Skip to content

Instantly share code, notes, and snippets.

@singhay
Created February 28, 2016 09:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save singhay/59dc684913626d35f75e to your computer and use it in GitHub Desktop.
Save singhay/59dc684913626d35f75e to your computer and use it in GitHub Desktop.
A Simplified Support Vector Machine Sequential minimal optimization Algorithm
# Inspired from http://cs229.stanford.edu/materials/smo.pdf
import os
import numpy as np
import pandas as pd
import math
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
def SMO(data, classLabel, constant, tolerance, maxpasses):
dataMatrix = np.array(data)
label = np.array(classLabel)
m,n = np.shape(dataMatrix)
alpha = np.zeros(m)
bias = 0
count = 0
passes = 0
while(passes < maxpasses):
num_alphas_changed = 0
for i in range(m):
Ei = float(E(dataMatrix,label,alpha,bias,i,m)) - float(label[i])
if((label[i] * Ei < -tolerance and alpha[i] < constant) or
(label[i] * Ei > tolerance and alpha[i] > 0)):
while True:
j = int(np.random.uniform(0,m))
if j != i: break
Ej = float(E(dataMatrix,label,alpha,bias,j,m)) - float(label[j])
alphaI_old = alpha[i]
alphaJ_old = alpha[j]
if label[i] != label[j] :
L = max(0, alpha[j] - alpha[i])
H = min(constant, constant + (alpha[j] - alpha[i]))
elif label[i] == label[j]:
L = max(0, (alpha[j] + alpha[i]) - constant)
H = min(constant, alpha[j] + alpha[i])
if L == H:
# print"L == H"
continue
eta = 2.0 * (np.dot(dataMatrix[i],dataMatrix[j])) \
- (np.dot(dataMatrix[i],dataMatrix[i])) \
- (np.dot(dataMatrix[j],dataMatrix[j]))
if eta >= 0:
# print "eta is bigger"
continue
alpha[j] -= (label[j] * (Ei - Ej))/eta
alpha[j] = max(alpha[j], L)
alpha[j] = min(alpha[j], H)
if abs(alpha[j] - alphaJ_old) < 0.00001:
# print 'abs(alpha[j] - alphaJ_old) < 0.00001'
continue
alpha[i] += (label[i]*label[j]*(alphaJ_old - alpha[j]))
#updating bias
bias1 = bias - Ei - label[i]*(alpha[i] - alphaI_old) * \
(np.dot(dataMatrix[i],dataMatrix[i])) - \
label[j]*(alpha[j] - alphaJ_old) * (np.dot(dataMatrix[i],dataMatrix[j]))
bias2 = bias - Ej - label[i]*(alpha[i] - alphaI_old) * \
(np.dot(dataMatrix[i],dataMatrix[j])) - \
label[j]*(alpha[j] - alphaJ_old) * (np.dot(dataMatrix[j],dataMatrix[j]))
if 0 < alpha[i] < constant:
bias = bias1
elif 0 < alpha[j] < constant:
bias = bias2
else:
bias = (bias1 + bias2)/2.0
if math.isnan(bias):
count += 1
print "count", count
num_alphas_changed += 1
# print 'Incremented alpha, ', num_alphas_changed
# print "iter: %d i:%d, pairs changed %d" % (
# passes, i, num_alphas_changed)
passes += 1 if num_alphas_changed == 0 else 0
print 'passes:', passes
# print [i for i in alpha]
print alpha
print 'Bias:',bias
def E(dataMatrix,label,alpha,bias,i,m):
return float(np.dot(np.multiply(alpha,label),(np.dot(dataMatrix[i],dataMatrix.T))) + bias)
#normalize in the range of 0 and 1
def normalizeData(data): return data/255
def main():
dataFrame = pd.read_csv('a2_datasets/digits/train.csv')
train = dataFrame.iloc[1:6517, 1:]
train_label = dataFrame.iloc[1:6517, 0]
dataFrame_test = pd.read_csv('a2_datasets/digits/test.csv')
test = dataFrame_test.iloc[1:1629, 1:]
test_label = dataFrame_test.iloc[1:1629, 0]
SMO(train, train_label, 1.0, 0.8, 3)
lSMO = LinearSVC(C=1.0, tol=0.8, max_iter=3)
lSMO.fit(train, train_label)
print 'Accuracy from LinearSVM: {:.2f}%'.format(lSMO.score(test, test_label)*100)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment