-
-
Save mblondel/586753 to your computer and use it in GitHub Desktop.
# Mathieu Blondel, September 2010 | |
# License: BSD 3 clause | |
import numpy as np | |
from numpy import linalg | |
import cvxopt | |
import cvxopt.solvers | |
def linear_kernel(x1, x2): | |
return np.dot(x1, x2) | |
def polynomial_kernel(x, y, p=3): | |
return (1 + np.dot(x, y)) ** p | |
def gaussian_kernel(x, y, sigma=5.0): | |
return np.exp(-linalg.norm(x-y)**2 / (2 * (sigma ** 2))) | |
class SVM(object): | |
def __init__(self, kernel=linear_kernel, C=None): | |
self.kernel = kernel | |
self.C = C | |
if self.C is not None: self.C = float(self.C) | |
def fit(self, X, y): | |
n_samples, n_features = X.shape | |
# Gram matrix | |
K = np.zeros((n_samples, n_samples)) | |
for i in range(n_samples): | |
for j in range(n_samples): | |
K[i,j] = self.kernel(X[i], X[j]) | |
P = cvxopt.matrix(np.outer(y,y) * K) | |
q = cvxopt.matrix(np.ones(n_samples) * -1) | |
A = cvxopt.matrix(y, (1,n_samples)) | |
b = cvxopt.matrix(0.0) | |
if self.C is None: | |
G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1)) | |
h = cvxopt.matrix(np.zeros(n_samples)) | |
else: | |
tmp1 = np.diag(np.ones(n_samples) * -1) | |
tmp2 = np.identity(n_samples) | |
G = cvxopt.matrix(np.vstack((tmp1, tmp2))) | |
tmp1 = np.zeros(n_samples) | |
tmp2 = np.ones(n_samples) * self.C | |
h = cvxopt.matrix(np.hstack((tmp1, tmp2))) | |
# solve QP problem | |
solution = cvxopt.solvers.qp(P, q, G, h, A, b) | |
# Lagrange multipliers | |
a = np.ravel(solution['x']) | |
# Support vectors have non zero lagrange multipliers | |
sv = a > 1e-5 | |
ind = np.arange(len(a))[sv] | |
self.a = a[sv] | |
self.sv = X[sv] | |
self.sv_y = y[sv] | |
print "%d support vectors out of %d points" % (len(self.a), n_samples) | |
# Intercept | |
self.b = 0 | |
for n in range(len(self.a)): | |
self.b += self.sv_y[n] | |
self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv]) | |
self.b /= len(self.a) | |
# Weight vector | |
if self.kernel == linear_kernel: | |
self.w = np.zeros(n_features) | |
for n in range(len(self.a)): | |
self.w += self.a[n] * self.sv_y[n] * self.sv[n] | |
else: | |
self.w = None | |
def project(self, X): | |
if self.w is not None: | |
return np.dot(X, self.w) + self.b | |
else: | |
y_predict = np.zeros(len(X)) | |
for i in range(len(X)): | |
s = 0 | |
for a, sv_y, sv in zip(self.a, self.sv_y, self.sv): | |
s += a * sv_y * self.kernel(X[i], sv) | |
y_predict[i] = s | |
return y_predict + self.b | |
def predict(self, X): | |
return np.sign(self.project(X)) | |
if __name__ == "__main__": | |
import pylab as pl | |
def gen_lin_separable_data(): | |
# generate training data in the 2-d case | |
mean1 = np.array([0, 2]) | |
mean2 = np.array([2, 0]) | |
cov = np.array([[0.8, 0.6], [0.6, 0.8]]) | |
X1 = np.random.multivariate_normal(mean1, cov, 100) | |
y1 = np.ones(len(X1)) | |
X2 = np.random.multivariate_normal(mean2, cov, 100) | |
y2 = np.ones(len(X2)) * -1 | |
return X1, y1, X2, y2 | |
def gen_non_lin_separable_data(): | |
mean1 = [-1, 2] | |
mean2 = [1, -1] | |
mean3 = [4, -4] | |
mean4 = [-4, 4] | |
cov = [[1.0,0.8], [0.8, 1.0]] | |
X1 = np.random.multivariate_normal(mean1, cov, 50) | |
X1 = np.vstack((X1, np.random.multivariate_normal(mean3, cov, 50))) | |
y1 = np.ones(len(X1)) | |
X2 = np.random.multivariate_normal(mean2, cov, 50) | |
X2 = np.vstack((X2, np.random.multivariate_normal(mean4, cov, 50))) | |
y2 = np.ones(len(X2)) * -1 | |
return X1, y1, X2, y2 | |
def gen_lin_separable_overlap_data(): | |
# generate training data in the 2-d case | |
mean1 = np.array([0, 2]) | |
mean2 = np.array([2, 0]) | |
cov = np.array([[1.5, 1.0], [1.0, 1.5]]) | |
X1 = np.random.multivariate_normal(mean1, cov, 100) | |
y1 = np.ones(len(X1)) | |
X2 = np.random.multivariate_normal(mean2, cov, 100) | |
y2 = np.ones(len(X2)) * -1 | |
return X1, y1, X2, y2 | |
def split_train(X1, y1, X2, y2): | |
X1_train = X1[:90] | |
y1_train = y1[:90] | |
X2_train = X2[:90] | |
y2_train = y2[:90] | |
X_train = np.vstack((X1_train, X2_train)) | |
y_train = np.hstack((y1_train, y2_train)) | |
return X_train, y_train | |
def split_test(X1, y1, X2, y2): | |
X1_test = X1[90:] | |
y1_test = y1[90:] | |
X2_test = X2[90:] | |
y2_test = y2[90:] | |
X_test = np.vstack((X1_test, X2_test)) | |
y_test = np.hstack((y1_test, y2_test)) | |
return X_test, y_test | |
def plot_margin(X1_train, X2_train, clf): | |
def f(x, w, b, c=0): | |
# given x, return y such that [x,y] in on the line | |
# w.x + b = c | |
return (-w[0] * x - b + c) / w[1] | |
pl.plot(X1_train[:,0], X1_train[:,1], "ro") | |
pl.plot(X2_train[:,0], X2_train[:,1], "bo") | |
pl.scatter(clf.sv[:,0], clf.sv[:,1], s=100, c="g") | |
# w.x + b = 0 | |
a0 = -4; a1 = f(a0, clf.w, clf.b) | |
b0 = 4; b1 = f(b0, clf.w, clf.b) | |
pl.plot([a0,b0], [a1,b1], "k") | |
# w.x + b = 1 | |
a0 = -4; a1 = f(a0, clf.w, clf.b, 1) | |
b0 = 4; b1 = f(b0, clf.w, clf.b, 1) | |
pl.plot([a0,b0], [a1,b1], "k--") | |
# w.x + b = -1 | |
a0 = -4; a1 = f(a0, clf.w, clf.b, -1) | |
b0 = 4; b1 = f(b0, clf.w, clf.b, -1) | |
pl.plot([a0,b0], [a1,b1], "k--") | |
pl.axis("tight") | |
pl.show() | |
def plot_contour(X1_train, X2_train, clf): | |
pl.plot(X1_train[:,0], X1_train[:,1], "ro") | |
pl.plot(X2_train[:,0], X2_train[:,1], "bo") | |
pl.scatter(clf.sv[:,0], clf.sv[:,1], s=100, c="g") | |
X1, X2 = np.meshgrid(np.linspace(-6,6,50), np.linspace(-6,6,50)) | |
X = np.array([[x1, x2] for x1, x2 in zip(np.ravel(X1), np.ravel(X2))]) | |
Z = clf.project(X).reshape(X1.shape) | |
pl.contour(X1, X2, Z, [0.0], colors='k', linewidths=1, origin='lower') | |
pl.contour(X1, X2, Z + 1, [0.0], colors='grey', linewidths=1, origin='lower') | |
pl.contour(X1, X2, Z - 1, [0.0], colors='grey', linewidths=1, origin='lower') | |
pl.axis("tight") | |
pl.show() | |
def test_linear(): | |
X1, y1, X2, y2 = gen_lin_separable_data() | |
X_train, y_train = split_train(X1, y1, X2, y2) | |
X_test, y_test = split_test(X1, y1, X2, y2) | |
clf = SVM() | |
clf.fit(X_train, y_train) | |
y_predict = clf.predict(X_test) | |
correct = np.sum(y_predict == y_test) | |
print "%d out of %d predictions correct" % (correct, len(y_predict)) | |
plot_margin(X_train[y_train==1], X_train[y_train==-1], clf) | |
def test_non_linear(): | |
X1, y1, X2, y2 = gen_non_lin_separable_data() | |
X_train, y_train = split_train(X1, y1, X2, y2) | |
X_test, y_test = split_test(X1, y1, X2, y2) | |
clf = SVM(gaussian_kernel) | |
clf.fit(X_train, y_train) | |
y_predict = clf.predict(X_test) | |
correct = np.sum(y_predict == y_test) | |
print "%d out of %d predictions correct" % (correct, len(y_predict)) | |
plot_contour(X_train[y_train==1], X_train[y_train==-1], clf) | |
def test_soft(): | |
X1, y1, X2, y2 = gen_lin_separable_overlap_data() | |
X_train, y_train = split_train(X1, y1, X2, y2) | |
X_test, y_test = split_test(X1, y1, X2, y2) | |
clf = SVM(C=0.1) | |
clf.fit(X_train, y_train) | |
y_predict = clf.predict(X_test) | |
correct = np.sum(y_predict == y_test) | |
print "%d out of %d predictions correct" % (correct, len(y_predict)) | |
plot_contour(X_train[y_train==1], X_train[y_train==-1], clf) | |
test_soft() |
The problem with using an off-the-shelf QP solver is that the matrix P is n_samples x n_samples and needs to be stored in memory. So this implementation is more a toy implementation than anything else :)
For the intercept b, shouldnt only the margin support vectors (strictly less than C) be used for the average? In the below code, the loop includes all support vectors (incl those that may be C) since 'a' contains all nonzero multipliers.. Am I missing something
Intercept
self.b = 0
for n in range(len(self.a)):
self.b += self.sv_y[n]
self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])
self.b /= len(self.a)
Thanks a lot for this useful python code, i am also looking for string kernel in python...anyone having idea?
how can I do the test with the data in a CSV file (train.csv)?
thank you
how can I do the test with the data in a CSV file (train.csv)?
how can i use it?
Just a toy.. It's a little bit funny. :D
Its really useful but i want one class svm
Please provide me a coding for one class svm algorithm in python.
How to tune it parameters?
Really interesting, thanks for something to play with!
Great code that is very helpful. Thanks.
Hi, can you provide your reference to implement this code?
Hi, the cvxopt QP solver is much slower than I expect when I run the train data. Is there any way to improve the runtime?
I need a code to implement data sets in java
Typeerror: 'A' must be a 'd' matrix with 16 columns
What am i doing wrong?
Can you pls write up a code for svm classification for images too?
@xitizme78 : your error usually means that the training labels you're giving are of type int. They should be of type float.
Do : y = y.astype(float)
how can i do a test to my data ?
Thanks man!...
The corresponding blog post is archived here
The corresponding blog post is archived here
thank you !
How can I modify this code to implement a SVR?
Here's the theoritical explanation of SVR which you can find here
Thx!
if anyone is interested in a possible implementation of an SVR according to the pdf linked by @guruprasaad123, they can find the code here: https://github.com/dmeoli/optiml/blob/master/optiml/ml/svm/_base.py
@mblondel I am really late to the party but feel like line 75 is incorrect. Shouldn't it me self.w[n] . Do correct me if i am wrong on this.
Interesting, how does it compare in practice with the SMO optimizer of libsvm? And the coordinate descent optimizer of liblinear?