Skip to content

Instantly share code, notes, and snippets.

@mblondel

mblondel/svm.py

Last active Oct 15, 2020
Embed
What would you like to do?
Support Vector Machines
# Mathieu Blondel, September 2010
# License: BSD 3 clause
import numpy as np
from numpy import linalg
import cvxopt
import cvxopt.solvers
def linear_kernel(x1, x2):
return np.dot(x1, x2)
def polynomial_kernel(x, y, p=3):
return (1 + np.dot(x, y)) ** p
def gaussian_kernel(x, y, sigma=5.0):
return np.exp(-linalg.norm(x-y)**2 / (2 * (sigma ** 2)))
class SVM(object):
def __init__(self, kernel=linear_kernel, C=None):
self.kernel = kernel
self.C = C
if self.C is not None: self.C = float(self.C)
def fit(self, X, y):
n_samples, n_features = X.shape
# Gram matrix
K = np.zeros((n_samples, n_samples))
for i in range(n_samples):
for j in range(n_samples):
K[i,j] = self.kernel(X[i], X[j])
P = cvxopt.matrix(np.outer(y,y) * K)
q = cvxopt.matrix(np.ones(n_samples) * -1)
A = cvxopt.matrix(y, (1,n_samples))
b = cvxopt.matrix(0.0)
if self.C is None:
G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))
h = cvxopt.matrix(np.zeros(n_samples))
else:
tmp1 = np.diag(np.ones(n_samples) * -1)
tmp2 = np.identity(n_samples)
G = cvxopt.matrix(np.vstack((tmp1, tmp2)))
tmp1 = np.zeros(n_samples)
tmp2 = np.ones(n_samples) * self.C
h = cvxopt.matrix(np.hstack((tmp1, tmp2)))
# solve QP problem
solution = cvxopt.solvers.qp(P, q, G, h, A, b)
# Lagrange multipliers
a = np.ravel(solution['x'])
# Support vectors have non zero lagrange multipliers
sv = a > 1e-5
ind = np.arange(len(a))[sv]
self.a = a[sv]
self.sv = X[sv]
self.sv_y = y[sv]
print "%d support vectors out of %d points" % (len(self.a), n_samples)
# Intercept
self.b = 0
for n in range(len(self.a)):
self.b += self.sv_y[n]
self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])
self.b /= len(self.a)
# Weight vector
if self.kernel == linear_kernel:
self.w = np.zeros(n_features)
for n in range(len(self.a)):
self.w += self.a[n] * self.sv_y[n] * self.sv[n]
else:
self.w = None
def project(self, X):
if self.w is not None:
return np.dot(X, self.w) + self.b
else:
y_predict = np.zeros(len(X))
for i in range(len(X)):
s = 0
for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):
s += a * sv_y * self.kernel(X[i], sv)
y_predict[i] = s
return y_predict + self.b
def predict(self, X):
return np.sign(self.project(X))
if __name__ == "__main__":
import pylab as pl
def gen_lin_separable_data():
# generate training data in the 2-d case
mean1 = np.array([0, 2])
mean2 = np.array([2, 0])
cov = np.array([[0.8, 0.6], [0.6, 0.8]])
X1 = np.random.multivariate_normal(mean1, cov, 100)
y1 = np.ones(len(X1))
X2 = np.random.multivariate_normal(mean2, cov, 100)
y2 = np.ones(len(X2)) * -1
return X1, y1, X2, y2
def gen_non_lin_separable_data():
mean1 = [-1, 2]
mean2 = [1, -1]
mean3 = [4, -4]
mean4 = [-4, 4]
cov = [[1.0,0.8], [0.8, 1.0]]
X1 = np.random.multivariate_normal(mean1, cov, 50)
X1 = np.vstack((X1, np.random.multivariate_normal(mean3, cov, 50)))
y1 = np.ones(len(X1))
X2 = np.random.multivariate_normal(mean2, cov, 50)
X2 = np.vstack((X2, np.random.multivariate_normal(mean4, cov, 50)))
y2 = np.ones(len(X2)) * -1
return X1, y1, X2, y2
def gen_lin_separable_overlap_data():
# generate training data in the 2-d case
mean1 = np.array([0, 2])
mean2 = np.array([2, 0])
cov = np.array([[1.5, 1.0], [1.0, 1.5]])
X1 = np.random.multivariate_normal(mean1, cov, 100)
y1 = np.ones(len(X1))
X2 = np.random.multivariate_normal(mean2, cov, 100)
y2 = np.ones(len(X2)) * -1
return X1, y1, X2, y2
def split_train(X1, y1, X2, y2):
X1_train = X1[:90]
y1_train = y1[:90]
X2_train = X2[:90]
y2_train = y2[:90]
X_train = np.vstack((X1_train, X2_train))
y_train = np.hstack((y1_train, y2_train))
return X_train, y_train
def split_test(X1, y1, X2, y2):
X1_test = X1[90:]
y1_test = y1[90:]
X2_test = X2[90:]
y2_test = y2[90:]
X_test = np.vstack((X1_test, X2_test))
y_test = np.hstack((y1_test, y2_test))
return X_test, y_test
def plot_margin(X1_train, X2_train, clf):
def f(x, w, b, c=0):
# given x, return y such that [x,y] in on the line
# w.x + b = c
return (-w[0] * x - b + c) / w[1]
pl.plot(X1_train[:,0], X1_train[:,1], "ro")
pl.plot(X2_train[:,0], X2_train[:,1], "bo")
pl.scatter(clf.sv[:,0], clf.sv[:,1], s=100, c="g")
# w.x + b = 0
a0 = -4; a1 = f(a0, clf.w, clf.b)
b0 = 4; b1 = f(b0, clf.w, clf.b)
pl.plot([a0,b0], [a1,b1], "k")
# w.x + b = 1
a0 = -4; a1 = f(a0, clf.w, clf.b, 1)
b0 = 4; b1 = f(b0, clf.w, clf.b, 1)
pl.plot([a0,b0], [a1,b1], "k--")
# w.x + b = -1
a0 = -4; a1 = f(a0, clf.w, clf.b, -1)
b0 = 4; b1 = f(b0, clf.w, clf.b, -1)
pl.plot([a0,b0], [a1,b1], "k--")
pl.axis("tight")
pl.show()
def plot_contour(X1_train, X2_train, clf):
pl.plot(X1_train[:,0], X1_train[:,1], "ro")
pl.plot(X2_train[:,0], X2_train[:,1], "bo")
pl.scatter(clf.sv[:,0], clf.sv[:,1], s=100, c="g")
X1, X2 = np.meshgrid(np.linspace(-6,6,50), np.linspace(-6,6,50))
X = np.array([[x1, x2] for x1, x2 in zip(np.ravel(X1), np.ravel(X2))])
Z = clf.project(X).reshape(X1.shape)
pl.contour(X1, X2, Z, [0.0], colors='k', linewidths=1, origin='lower')
pl.contour(X1, X2, Z + 1, [0.0], colors='grey', linewidths=1, origin='lower')
pl.contour(X1, X2, Z - 1, [0.0], colors='grey', linewidths=1, origin='lower')
pl.axis("tight")
pl.show()
def test_linear():
X1, y1, X2, y2 = gen_lin_separable_data()
X_train, y_train = split_train(X1, y1, X2, y2)
X_test, y_test = split_test(X1, y1, X2, y2)
clf = SVM()
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
correct = np.sum(y_predict == y_test)
print "%d out of %d predictions correct" % (correct, len(y_predict))
plot_margin(X_train[y_train==1], X_train[y_train==-1], clf)
def test_non_linear():
X1, y1, X2, y2 = gen_non_lin_separable_data()
X_train, y_train = split_train(X1, y1, X2, y2)
X_test, y_test = split_test(X1, y1, X2, y2)
clf = SVM(gaussian_kernel)
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
correct = np.sum(y_predict == y_test)
print "%d out of %d predictions correct" % (correct, len(y_predict))
plot_contour(X_train[y_train==1], X_train[y_train==-1], clf)
def test_soft():
X1, y1, X2, y2 = gen_lin_separable_overlap_data()
X_train, y_train = split_train(X1, y1, X2, y2)
X_test, y_test = split_test(X1, y1, X2, y2)
clf = SVM(C=0.1)
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
correct = np.sum(y_predict == y_test)
print "%d out of %d predictions correct" % (correct, len(y_predict))
plot_contour(X_train[y_train==1], X_train[y_train==-1], clf)
test_soft()
@ogrisel

This comment has been minimized.

Copy link

@ogrisel ogrisel commented Sep 19, 2010

Interesting, how does it compare in practice with the SMO optimizer of libsvm? And the coordinate descent optimizer of liblinear?

@mblondel

This comment has been minimized.

Copy link
Owner Author

@mblondel mblondel commented Sep 21, 2010

The problem with using an off-the-shelf QP solver is that the matrix P is n_samples x n_samples and needs to be stored in memory. So this implementation is more a toy implementation than anything else :)

@rbarve

This comment has been minimized.

Copy link

@rbarve rbarve commented Aug 3, 2013

For the intercept b, shouldnt only the margin support vectors (strictly less than C) be used for the average? In the below code, the loop includes all support vectors (incl those that may be C) since 'a' contains all nonzero multipliers.. Am I missing something

Intercept

    self.b = 0
    for n in range(len(self.a)):
        self.b += self.sv_y[n]
        self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])
    self.b /= len(self.a)
@amit2510

This comment has been minimized.

Copy link

@amit2510 amit2510 commented Jun 17, 2014

Thanks a lot for this useful python code, i am also looking for string kernel in python...anyone having idea?

@enrif

This comment has been minimized.

Copy link

@enrif enrif commented Jan 5, 2015

how can I do the test with the data in a CSV file (train.csv)?
thank you

@sujithms921

This comment has been minimized.

Copy link

@sujithms921 sujithms921 commented Dec 1, 2015

how can I do the test with the data in a CSV file (train.csv)?

@zeromtmu

This comment has been minimized.

Copy link

@zeromtmu zeromtmu commented Mar 17, 2016

how can i use it?

@liangweigang

This comment has been minimized.

Copy link

@liangweigang liangweigang commented Jul 11, 2016

Just a toy.. It's a little bit funny. :D

@banusara

This comment has been minimized.

Copy link

@banusara banusara commented Oct 7, 2016

Its really useful but i want one class svm
Please provide me a coding for one class svm algorithm in python.
How to tune it parameters?

@matt123miller

This comment has been minimized.

Copy link

@matt123miller matt123miller commented Feb 7, 2017

Really interesting, thanks for something to play with!

@likejazz

This comment has been minimized.

Copy link

@likejazz likejazz commented Feb 9, 2017

Great code that is very helpful. Thanks.

@pedrotnascimento

This comment has been minimized.

Copy link

@pedrotnascimento pedrotnascimento commented Apr 16, 2017

Hi, can you provide your reference to implement this code?

@InfamousMeGa

This comment has been minimized.

Copy link

@InfamousMeGa InfamousMeGa commented Apr 21, 2017

Hi, the cvxopt QP solver is much slower than I expect when I run the train data. Is there any way to improve the runtime?

@poesie11

This comment has been minimized.

Copy link

@poesie11 poesie11 commented Apr 21, 2017

I need a code to implement data sets in java

@xitizme78

This comment has been minimized.

Copy link

@xitizme78 xitizme78 commented Jul 16, 2017

Typeerror: 'A' must be a 'd' matrix with 16 columns
What am i doing wrong?

@Aaf07

This comment has been minimized.

Copy link

@Aaf07 Aaf07 commented Jul 31, 2017

Can you pls write up a code for svm classification for images too?

@ugurite

This comment has been minimized.

Copy link

@ugurite ugurite commented Apr 2, 2018

@xitizme78 : your error usually means that the training labels you're giving are of type int. They should be of type float.
Do : y = y.astype(float)

@oottoohh

This comment has been minimized.

Copy link

@oottoohh oottoohh commented Apr 10, 2018

how can i do a test to my data ?

@realSaeedHassani

This comment has been minimized.

Copy link

@realSaeedHassani realSaeedHassani commented Oct 23, 2018

Thanks man!...

@mblondel

This comment has been minimized.

Copy link
Owner Author

@mblondel mblondel commented Nov 23, 2018

@guruprasaad123

This comment has been minimized.

@dmeoli

This comment has been minimized.

Copy link

@dmeoli dmeoli commented Mar 3, 2020

How can I modify this code to implement a SVR?

@guruprasaad123

This comment has been minimized.

Copy link

@guruprasaad123 guruprasaad123 commented Mar 4, 2020

Here's the theoritical explanation of SVR which you can find here

@dmeoli

This comment has been minimized.

Copy link

@dmeoli dmeoli commented Mar 5, 2020

Thx!

@dmeoli

This comment has been minimized.

Copy link

@dmeoli dmeoli commented Mar 9, 2020

if anyone is interested in a possible implementation of an SVR according to the pdf linked by @guruprasaad123, they can find the code here: https://github.com/dmeoli/optiml/blob/master/optiml/ml/svm/_base.py

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.