sergeyf/chi_square_kernel.py

## chi_square_kernel.py
import numpy as np
from sklearn.feature_extraction import image
from sklearn.cluster import MiniBatchKMeans
from sklearn import cross_validation, svm, datasets
from sklearn.datasets import fetch_olivetti_faces, fetch_mldata
from matplotlib import pylab as pl

def HIK_kernel(X,Y):
    return np.array([[np.sum(np.minimum(x,y)) for y in Y] for x in X])

# http://media.nips.cc/nipsbooks/nipspapers/paper_files/nips26/1226.pdf
def chi_square_kernel(X,Y):
    return np.array([[2*np.sum(x*y/(x+y+1e-16)) for y in Y] for x in X])

# from the coates, ng et al paper
# some code inspired from here:
# http://nbviewer.ipython.org/github/jaberg/IPythonTheanoTutorials/blob/master/ipynb/Preprocessing%20-%20Image%20Whitening.ipynb?create=1
class kMeansCoder:
    def __init__(self,num_clusters=256,w=(4,4),beta=10.0,gamma=0.01):
        self.num_clusters = num_clusters
        self.w = w
        self.beta = beta
        self.gamma = gamma
        self.patches_per_image = None

    def fit(self,images):
        patches = self.get_patches(images)

        # demean and contrast normalize
        xm = patches.mean(1)[:,None]
        Xc = patches - xm # centered
        l2 = (Xc * Xc).sum(axis=1)
        div2 = np.sqrt(l2[:,None] + self.beta)
        X = Xc / div2

        # ZCA_whitening
        C = np.dot(X.T, X) / (X.shape[0] - 1) # sample covariance -> assuming zero mean
        D, V = np.linalg.eigh(C) # decomposition
        self.P = np.dot(np.sqrt(1.0 / (D + self.gamma)) * V, V.T) # the transform
        X_white_flat = np.dot(X, self.P)

        # fit the k-means to X_white_flat
        self.kmeans = MiniBatchKMeans(n_clusters=self.num_clusters)
        self.kmeans.fit(X_white_flat)

        # get the actual histograms for training
        m = len(images)
        histograms = np.zeros( (m,self.num_clusters) )
        bins = np.arange(self.num_clusters+1)
        for j in xrange(len(images)):
            local_patches = X_white_flat[self.patches_per_image*j:self.patches_per_image*(j+1)]
            h,b = np.histogram(self.kmeans.predict(local_patches),bins,normed=True)
            histograms[j,:] = h

        return histograms

    def transform(self,images):
        patches = self.get_patches(images)
        Xc = patches - patches.mean(1)[:,None]
        l2 = (Xc * Xc).sum(axis=1)
        div2 = np.sqrt(l2[:,None] + self.beta)
        X = Xc / div2 # contrast normalize according to training
        X_white_flat = np.dot(X, self.P) # white according to training

        m = len(images)
        histograms = np.zeros( (m,self.num_clusters) )
        bins = np.arange(self.num_clusters+1)
        for j in xrange(len(images)):
            local_patches = X_white_flat[self.patches_per_image*j:self.patches_per_image*(j+1)]
            h,b = np.histogram(self.kmeans.predict(local_patches),bins,normed=True)
            histograms[j,:] = h

        return histograms

    def get_patches(self,images,randfrac=0.5):
        n = images.shape[0]
        if self.patches_per_image is None:
            self.patches_per_image = (images.shape[1] - self.w[0] + 1) * (images.shape[2] - self.w[1] + 1)
            self.patches_per_image = int(self.patches_per_image*randfrac)
        patches = np.zeros( (self.patches_per_image*n,np.prod(self.w)) )

        for j,curr_image in enumerate(images):
            patch = image.extract_patches_2d(curr_image, self.w,max_patches=randfrac)# one example flat patch
            flat_patches = patch.reshape(self.patches_per_image,np.prod(self.w))
            patches[self.patches_per_image*j:self.patches_per_image*(j+1)] = flat_patches

        return patches

# change this to do the experiment with a diferent dataset
data_ind = 0

# images are 2d -> can extract patches
# data are 1d -> images flattened
if data_ind == 0:
    C_vec = [1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e4]
    w = (4,4)
    dataset = datasets.load_digits()
    images = dataset.images
    labels = dataset.target
    name = 'Digits'
elif data_ind == 1:
    C_vec = [1e-1,1e0,1e1,1e2,1e3,1e4,1e5,1e6]
    w = (8,8)
    dataset = fetch_olivetti_faces()
    images = dataset.images
    labels = dataset.target
    name = 'Olivetti'
elif data_ind == 2:
    C_vec = [1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e4]
    w = (16,16)
    dataset = fetch_mldata('MNIST original')
    n = dataset.data.shape[0]
    images = dataset.data.reshape((n,28,28))
    labels = dataset.target
    name = 'MNIST (small)'
    # take small subset
    subset = np.arange(n)
    np.random.shuffle(subset)
    subset = subset[0:n*0.1] # using a random tenth of MNIST
    images = images[subset,:,:]
    labels = labels[subset,:,:]

# split up into training and test 50/50
del dataset
images_train, images_test, y_train, y_test = cross_validation.train_test_split(images, labels, test_size=0.5, random_state=0)
del images
del labels

# train kmeans encoder
coder = kMeansCoder(w=w)
X_train = coder.fit(images_train)
X_test = coder.transform(images_test)

# prefetching costly kernels
K_HIK = HIK_kernel(X_train,X_train)
K_HIK_test = HIK_kernel(X_test,X_train)
K_chi = chi_square_kernel(X_train,X_train)
K_chi_test = chi_square_kernel(X_test,X_train)

# train SVM with different kernels
HIK_score = []
chi_square_score = []
rbf_score = []
linear_score = []
for c in C_vec:
    clf = svm.SVC(kernel='precomputed', C=c)
    clf.fit(K_HIK, y_train)
    HIK_score.append(clf.score(K_HIK_test,y_test))

    clf = svm.SVC(kernel='precomputed', C=c)
    clf.fit(K_chi, y_train)
    chi_square_score.append(clf.score(K_chi_test,y_test))

    clf = svm.SVC(kernel='rbf', C=c)
    clf.fit(X_train, y_train)
    rbf_score.append(clf.score(X_test,y_test))

    clf = svm.SVC(kernel='linear', C=c)
    clf.fit(X_train, y_train)
    linear_score.append(clf.score(X_test,y_test))

pl.semilogx(C_vec,HIK_score,label='HIK',linewidth=4,basex=10)
pl.semilogx(C_vec,chi_square_score,label='Chi-Square Kernel',linewidth=4,basex=10)
pl.semilogx(C_vec,rbf_score,label='RBF Kernel',linewidth=4,basex=10)
pl.semilogx(C_vec,linear_score,label='Linear Kernel',linewidth=4,basex=10)
pl.title(name + ' Dataset Results')
pl.xlabel('C (SVM Parameter)')
pl.ylabel('Test Accuracy')
pl.legend(loc='lower right')
pl.show()
	import numpy as np
	from sklearn.feature_extraction import image
	from sklearn.cluster import MiniBatchKMeans
	from sklearn import cross_validation, svm, datasets
	from sklearn.datasets import fetch_olivetti_faces, fetch_mldata
	from matplotlib import pylab as pl

	def HIK_kernel(X,Y):
	return np.array([[np.sum(np.minimum(x,y)) for y in Y] for x in X])

	# http://media.nips.cc/nipsbooks/nipspapers/paper_files/nips26/1226.pdf
	def chi_square_kernel(X,Y):
	return np.array([[2np.sum(xy/(x+y+1e-16)) for y in Y] for x in X])

	# from the coates, ng et al paper
	# some code inspired from here:
	# http://nbviewer.ipython.org/github/jaberg/IPythonTheanoTutorials/blob/master/ipynb/Preprocessing%20-%20Image%20Whitening.ipynb?create=1
	class kMeansCoder:
	def __init__(self,num_clusters=256,w=(4,4),beta=10.0,gamma=0.01):
	self.num_clusters = num_clusters
	self.w = w
	self.beta = beta
	self.gamma = gamma
	self.patches_per_image = None

	def fit(self,images):
	patches = self.get_patches(images)

	# demean and contrast normalize
	xm = patches.mean(1)[:,None]
	Xc = patches - xm # centered
	l2 = (Xc * Xc).sum(axis=1)
	div2 = np.sqrt(l2[:,None] + self.beta)
	X = Xc / div2

	# ZCA_whitening
	C = np.dot(X.T, X) / (X.shape[0] - 1) # sample covariance -> assuming zero mean
	D, V = np.linalg.eigh(C) # decomposition
	self.P = np.dot(np.sqrt(1.0 / (D + self.gamma)) * V, V.T) # the transform
	X_white_flat = np.dot(X, self.P)

	# fit the k-means to X_white_flat
	self.kmeans = MiniBatchKMeans(n_clusters=self.num_clusters)
	self.kmeans.fit(X_white_flat)

	# get the actual histograms for training
	m = len(images)
	histograms = np.zeros( (m,self.num_clusters) )
	bins = np.arange(self.num_clusters+1)
	for j in xrange(len(images)):
	local_patches = X_white_flat[self.patches_per_imagej:self.patches_per_image(j+1)]
	h,b = np.histogram(self.kmeans.predict(local_patches),bins,normed=True)
	histograms[j,:] = h

	return histograms

	def transform(self,images):
	patches = self.get_patches(images)
	Xc = patches - patches.mean(1)[:,None]
	l2 = (Xc * Xc).sum(axis=1)
	div2 = np.sqrt(l2[:,None] + self.beta)
	X = Xc / div2 # contrast normalize according to training
	X_white_flat = np.dot(X, self.P) # white according to training

	m = len(images)
	histograms = np.zeros( (m,self.num_clusters) )
	bins = np.arange(self.num_clusters+1)
	for j in xrange(len(images)):
	local_patches = X_white_flat[self.patches_per_imagej:self.patches_per_image(j+1)]
	h,b = np.histogram(self.kmeans.predict(local_patches),bins,normed=True)
	histograms[j,:] = h

	return histograms

	def get_patches(self,images,randfrac=0.5):
	n = images.shape[0]
	if self.patches_per_image is None:
	self.patches_per_image = (images.shape[1] - self.w[0] + 1) * (images.shape[2] - self.w[1] + 1)
	self.patches_per_image = int(self.patches_per_image*randfrac)
	patches = np.zeros( (self.patches_per_image*n,np.prod(self.w)) )

	for j,curr_image in enumerate(images):
	patch = image.extract_patches_2d(curr_image, self.w,max_patches=randfrac)# one example flat patch
	flat_patches = patch.reshape(self.patches_per_image,np.prod(self.w))
	patches[self.patches_per_imagej:self.patches_per_image(j+1)] = flat_patches

	return patches

	# change this to do the experiment with a diferent dataset
	data_ind = 0

	# images are 2d -> can extract patches
	# data are 1d -> images flattened
	if data_ind == 0:
	C_vec = [1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e4]
	w = (4,4)
	dataset = datasets.load_digits()
	images = dataset.images
	labels = dataset.target
	name = 'Digits'
	elif data_ind == 1:
	C_vec = [1e-1,1e0,1e1,1e2,1e3,1e4,1e5,1e6]
	w = (8,8)
	dataset = fetch_olivetti_faces()
	images = dataset.images
	labels = dataset.target
	name = 'Olivetti'
	elif data_ind == 2:
	C_vec = [1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e4]
	w = (16,16)
	dataset = fetch_mldata('MNIST original')
	n = dataset.data.shape[0]
	images = dataset.data.reshape((n,28,28))
	labels = dataset.target
	name = 'MNIST (small)'
	# take small subset
	subset = np.arange(n)
	np.random.shuffle(subset)
	subset = subset[0:n*0.1] # using a random tenth of MNIST
	images = images[subset,:,:]
	labels = labels[subset,:,:]

	# split up into training and test 50/50
	del dataset
	images_train, images_test, y_train, y_test = cross_validation.train_test_split(images, labels, test_size=0.5, random_state=0)
	del images
	del labels

	# train kmeans encoder
	coder = kMeansCoder(w=w)
	X_train = coder.fit(images_train)
	X_test = coder.transform(images_test)

	# prefetching costly kernels
	K_HIK = HIK_kernel(X_train,X_train)
	K_HIK_test = HIK_kernel(X_test,X_train)
	K_chi = chi_square_kernel(X_train,X_train)
	K_chi_test = chi_square_kernel(X_test,X_train)

	# train SVM with different kernels
	HIK_score = []
	chi_square_score = []
	rbf_score = []
	linear_score = []
	for c in C_vec:
	clf = svm.SVC(kernel='precomputed', C=c)
	clf.fit(K_HIK, y_train)
	HIK_score.append(clf.score(K_HIK_test,y_test))

	clf = svm.SVC(kernel='precomputed', C=c)
	clf.fit(K_chi, y_train)
	chi_square_score.append(clf.score(K_chi_test,y_test))

	clf = svm.SVC(kernel='rbf', C=c)
	clf.fit(X_train, y_train)
	rbf_score.append(clf.score(X_test,y_test))

	clf = svm.SVC(kernel='linear', C=c)
	clf.fit(X_train, y_train)
	linear_score.append(clf.score(X_test,y_test))

	pl.semilogx(C_vec,HIK_score,label='HIK',linewidth=4,basex=10)
	pl.semilogx(C_vec,chi_square_score,label='Chi-Square Kernel',linewidth=4,basex=10)
	pl.semilogx(C_vec,rbf_score,label='RBF Kernel',linewidth=4,basex=10)
	pl.semilogx(C_vec,linear_score,label='Linear Kernel',linewidth=4,basex=10)
	pl.title(name + ' Dataset Results')
	pl.xlabel('C (SVM Parameter)')
	pl.ylabel('Test Accuracy')
	pl.legend(loc='lower right')
	pl.show()