Skip to content
Create a gist now

Instantly share code, notes, and snippets.

@glamp /
Last active

Embed URL


Subversion checkout URL

You can clone with
Download ZIP
Plotting SVM predictions using matplotlib and sklearn
import numpy as np
import pylab as pl
import pandas as pd
from sklearn import svm
from sklearn import linear_model
from sklearn import tree
from sklearn.metrics import confusion_matrix
x_min, x_max = 0, 15
y_min, y_max = 0, 10
step = .1
# to plot the boundary, we're going to create a matrix of every possible point
# then label each point as a wolf or cow using our classifier
xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step))
df = pd.DataFrame(data={'x': xx.ravel(), 'y': yy.ravel()})
df['color_gauge'] = (df.x-7.5)**2 + (df.y-5)**2
df['color'] = df.color_gauge.apply(lambda x: "red" if x <= 15 else "green")
df['color_as_int'] = df.color.apply(lambda x: 0 if x=="red" else 1)
print "Points on flag:"
print df.groupby('color').size()
figure = 1
# plot a figure for the entire dataset
for color in df.color.unique():
idx = df.color==color
pl.subplot(2, 2, figure)
pl.scatter(df[idx].x, df[idx].y, color=color)
train_idx = df.x < 10
train = df[train_idx]
test = df[-train_idx]
print "Training Set Size: %d" % len(train)
print "Test Set Size: %d" % len(test)
# train using the x and y position coordiantes
cols = ["x", "y"]
clfs = {
"SVM": svm.SVC(degree=0.5),
"Logistic" : linear_model.LogisticRegression(),
"Decision Tree": tree.DecisionTreeClassifier()
# racehorse different classifiers and plot the results
for clf_name, clf in clfs.iteritems():
figure += 1
# train the classifier[cols], train.color_as_int)
# get the predicted values from the test set
test['predicted_color_as_int'] = clf.predict(test[cols])
test['pred_color'] = test.predicted_color_as_int.apply(lambda x: "red" if x==0 else "green")
# create a new subplot on the plot
pl.subplot(2, 2, figure)
# plot each predicted color
for color in test.pred_color.unique():
# plot only rows where pred_color is equal to color
idx = test.pred_color==color
pl.scatter(test[idx].x, test[idx].y, color=color)
# plot the training set as well
for color in train.color.unique():
idx = train.color==color
pl.scatter(train[idx].x, train[idx].y, color=color)
# add a dotted line to show the boundary between the training and test set
# (everything to the right of the line is in the test set)
#this plots a vertical line
train_line_y = np.linspace(y_min, y_max) #evenly spaced array from 0 to 10
train_line_x = np.repeat(10, len(train_line_y)) #repeat 10 (threshold for traininset) n times
# add a black, dotted line to the subplot
pl.plot(train_line_x, train_line_y, 'k--', color="black")
print "Confusion Matrix for %s:" % clf_name
print confusion_matrix(test.color, test.pred_color)

Thanks for your code and I will get a better understanding.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.