Skip to content

Instantly share code, notes, and snippets.

@aeweiwi
Last active December 30, 2015 06:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aeweiwi/7788156 to your computer and use it in GitHub Desktop.
Save aeweiwi/7788156 to your computer and use it in GitHub Desktop.
# (C) Abdalrahman Eweiwi, December 2013
# License: BSD 3 clause
from scipy.spatial.distance import norm, cdist
import numpy as np
from sklearn import datasets, svm, metrics, cross_validation
import scipy.linalg as linalg
from sklearn.metrics.pairwise import linear_kernel,rbf_kernel,chi2_kernel
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.grid_search import GridSearchCV
class kpls(BaseEstimator, RegressorMixin):
def intersection_dist(self,v1,v2):
return np.sum((np.minimum(v1,v2)))
#***********************************************************************
#***********************************************************************
def intersection_kernel(self,m1,m2):
if m2 is None:
return cdist(m1,m1,self.intersection_dist)
else:
return cdist(m1,m2,self.intersection_dist)
#***********************************************************************
#***********************************************************************
def get_kernel(self,Xtr,Xtst=None):
"""
X : train features
Y : test features
if Y is not defined XX.T returned
else XY.T returned
"""
#if Y is None, K = <phi(X),phi(X)> else K = <phi(X),phi(Y)>
if self.kernel == 'rbf':
return rbf_kernel(Xtr,Xtst,self.gamma).T
elif self.kernel == 'chi2':
return chi2_kernel(Xtr,Xtst,self.gamma ).T
elif self.kernel == 'linear':
return linear_kernel(Xtr,Xtst).T
elif self.kernel == 'intersection':
return self.intersection_kernel(Xtr,Xtst).T
#***********************************************************************
#***********************************************************************
def center_data(self,mat,scale=False):
matc = mat.copy()
matm = matc.mean(axis=0)
matc -= matm
if scale:
matc_std = matc.std(axis=0,ddof=1)
matc_std[matc_std == 0 ] = 1
matc /= matc_std
else:
matc_std = np.ones(matc.shape[1])
return matc,matm,matc_std
#***********************************************************************
#***********************************************************************
def center_test_kernel(self):
In = np.eye(self.ntr)
n1 = np.ones((self.ntr,1))
nt1 = np.ones((self.ntst,1))
h_r = In - (1./self.ntr)* np.dot(n1,n1.T)
h_l = self.Ktst - (1./self.ntr)* np.dot(nt1,np.dot(n1.T,self.Ktr))
Ktstc = np.dot(h_l,h_r)
return Ktstc
#***********************************************************************
#***********************************************************************
def center_train_kernel(self):
O = (1./self.ntr)*np.ones((self.ntr,self.ntr))
In = np.eye(self.ntr)
Ktrc = np.dot((In-O),np.dot(self.Ktr,(In-O)))
return Ktrc
#***********************************************************************
#***********************************************************************
def get_weight_vec(self,Ktrc,Yc):
XTY = np.dot(Ktrc.T,Yc)
u,s,vh = linalg.svd(XTY,full_matrices=False)
v = vh.T
return u[:,[0]],v[:,[0]]
#***********************************************************************
#***********************************************************************
def fit(self,Xtr,Ytr,Ktr=None,**param):
"""
Xtr: training samples [samples_count X samples_dimension]
Ytr: response [samples_count X response_dimension]
Ktr: precomputed kernel
"""
if param is not None:
self.set_params(**param)
if Ktr is None:
self.Xtr = Xtr
self.Ktr = self.get_kernel(self.Xtr)
else:
self.Ktr = Ktr
#perform hot-encoding if the purpose is classification
if self.purpose == 'classification':
self.Ytr = self.format_labels(Ytr)
elif self.purpose == 'regression':
self.Ytr = Ytr[:,np.newaxis]
self.resp_dim = self.Ytr.shape[1]
self.ntr = self.Ktr.shape[0]
#center train and test kernels
self.Ktrc = self.center_train_kernel()
self.Yc,self.Ym,self.Ystd = self.center_data(self.Ytr,self.scale)
#initilize matrices
T = np.zeros((self.ntr,self.components))
U = np.zeros((self.ntr,self.components))
C = np.zeros((self.resp_dim,self.components))
Yc_copy = self.Yc.copy()
Ktrc_copy = self.Ktrc.copy()
for i in range(self.components):
t,z = self.get_weight_vec(Ktrc_copy,Yc_copy)
c = np.dot(Yc_copy.T,t)
u = np.dot(Yc_copy,c)
u = u/norm(u)
In = np.eye(self.ntr)
Ktrc_copy = np.dot( (In - np.dot(t,t.T)), np.dot( Ktrc_copy , (In - np.dot(t,t.T)) ) )
Yc_copy -= np.dot(t,np.dot(t.T,Yc_copy))
C[:,i] = c.squeeze()
T[:,i] = t.squeeze()
U[:,i] = u.squeeze()
self.T = T
self.U = U
self.C = C
#***********************************************************************
#***********************************************************************
def format_labels(self,Y):
num_of_classes = len(set(Y))
Yf = np.zeros((Y.shape[0],num_of_classes))
Yf[np.arange(Y.shape[0]),Y] = 1
return Yf
#***********************************************************************
#***********************************************************************
def __init__(self,components=40,kernel='intersection',gamma=None,purpose='classification',scale =True):
"""
Initilize kpls
kernel:['linear','rbf','intersection','chi2']
gamma: kernel parameter used for rbf
scale: scale the input data to have standard deviation
purpose: ['classification','regression']
"""
self.gamma = gamma
self.kernel = kernel
self.components = components
self.scale = scale
self.purpose = purpose
#***********************************************************************
#***********************************************************************
def predict(self,Xtst):
self.ntst = Xtst.shape[0]
self.Ktst = self.get_kernel(self.Xtr,Xtst)
self.Ktstc = self.center_test_kernel()
Y_pred = np.dot(self.Ktstc,
np.dot(self.U,np.dot(
linalg.inv(1e-5*np.eye(self.components) +np.dot(self.T.T,np.dot(self.Ktrc,self.U))),
np.dot(self.T.T,self.Yc))))
Y_pred = Y_pred + self.Ym
if self.purpose == 'classification':
self.proba_ = Y_pred
return self.proba_.argmax(1)
elif self.purpose == 'regression':
return Y_pred
else:
return Y_pred
#***********************************************************************
#***********************************************************************
def predict_kernel(self,Ktst):
self.ntst = Ktst.shape[0]
self.Ktst = Ktst
self.Ktstc = self.center_test_kernel()
Y_pred = np.dot(self.Ktstc,
np.dot(self.U,np.dot(
linalg.inv(1e-5*np.eye(self.components) +np.dot(self.T.T,np.dot(self.Ktrc,self.U))),
np.dot(self.T.T,self.Yc))))
Y_pred = Y_pred + self.Ym
return Y_pred
#***********************************************************************
#***********************************************************************
def evaluate_estimator(estimator,tuned_parameters,X_train,y_train,X_test,y_test):
# Set the parameters by cross-validation
cv = cross_validation.StratifiedShuffleSplit(y_train,5,test_size=0.2,random_state=0)
clf = GridSearchCV(estimator, tuned_parameters, cv=cv,verbose=0,n_jobs=1,score_func=metrics.precision_score)
clf.fit(X_train, y_train)
print("Best parameters set found on development set:")
print()
print(clf.best_estimator_)
print()
print("Grid scores on development set:")
print()
for params, mean_score, scores in clf.grid_scores_:
print("%0.3f (+/-%0.03f) for %r"
% (mean_score, scores.std() / 2, params))
print()
print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(metrics.classification_report(y_true, y_pred))
print()
if __name__ == '__main__':
kpls_tuned_parameters = [{'kernel':['chi2'],'components':[40,60,80,100],'gamma':[0.005,0.1,0.01,1e-3,1e-4]},
{'kernel': ['linear'], 'components': [20,40, 60,80,100]}]
svm_tuned_parameters = [{'kernel':['rbf'],'C':[0.1,0.01,1,10,100],'gamma':[0.005,0.1,0.01,1e-3,1e-4]},
{'kernel': ['linear'], 'C':[0.1,0.01,1,10,100]}]
kpls_clf = kpls()
svm_clf = svm.SVC()
digits = datasets.load_digits()
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
targets = digits.target
cv = cross_validation.ShuffleSplit(n_samples,5,0.5)
report = []
print 'Evaluating KPLS and SKLEARN SVC for classification on the digits dataset'
itr = 1
for train,test in cv:
X_train = data[train,:]
X_test = data[test,:]
y_train = targets[train]
y_test = targets[test]
print 'KPLS evaluation report on split %d'%itr
print '------------------------------------------------------------------------'
evaluate_estimator(kpls_clf,kpls_tuned_parameters,X_train,y_train,X_test,y_test)
print '------------------------------------------------------------------------'
print 'SVM evaluation report on split %d'%itr
print '------------------------------------------------------------------------'
evaluate_estimator(svm_clf,svm_tuned_parameters,X_train,y_train,X_test,y_test)
print '------------------------------------------------------------------------'
itr+=1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment