Instantly share code, notes, and snippets.

# glamp/svmflag.py Last active Dec 1, 2017

What would you like to do?
Plotting SVM predictions using matplotlib and sklearn
 import numpy as np import pylab as pl import pandas as pd from sklearn import svm from sklearn import linear_model from sklearn import tree from sklearn.metrics import confusion_matrix x_min, x_max = 0, 15 y_min, y_max = 0, 10 step = .1 # to plot the boundary, we're going to create a matrix of every possible point # then label each point as a wolf or cow using our classifier xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step)) df = pd.DataFrame(data={'x': xx.ravel(), 'y': yy.ravel()}) df['color_gauge'] = (df.x-7.5)**2 + (df.y-5)**2 df['color'] = df.color_gauge.apply(lambda x: "red" if x <= 15 else "green") df['color_as_int'] = df.color.apply(lambda x: 0 if x=="red" else 1) print "Points on flag:" print df.groupby('color').size() print figure = 1 # plot a figure for the entire dataset for color in df.color.unique(): idx = df.color==color pl.subplot(2, 2, figure) pl.scatter(df[idx].x, df[idx].y, color=color) pl.title('Actual') train_idx = df.x < 10 train = df[train_idx] test = df[-train_idx] print "Training Set Size: %d" % len(train) print "Test Set Size: %d" % len(test) # train using the x and y position coordiantes cols = ["x", "y"] clfs = { "SVM": svm.SVC(degree=0.5), "Logistic" : linear_model.LogisticRegression(), "Decision Tree": tree.DecisionTreeClassifier() } # racehorse different classifiers and plot the results for clf_name, clf in clfs.iteritems(): figure += 1 # train the classifier clf.fit(train[cols], train.color_as_int) # get the predicted values from the test set test['predicted_color_as_int'] = clf.predict(test[cols]) test['pred_color'] = test.predicted_color_as_int.apply(lambda x: "red" if x==0 else "green") # create a new subplot on the plot pl.subplot(2, 2, figure) # plot each predicted color for color in test.pred_color.unique(): # plot only rows where pred_color is equal to color idx = test.pred_color==color pl.scatter(test[idx].x, test[idx].y, color=color) # plot the training set as well for color in train.color.unique(): idx = train.color==color pl.scatter(train[idx].x, train[idx].y, color=color) # add a dotted line to show the boundary between the training and test set # (everything to the right of the line is in the test set) #this plots a vertical line train_line_y = np.linspace(y_min, y_max) #evenly spaced array from 0 to 10 train_line_x = np.repeat(10, len(train_line_y)) #repeat 10 (threshold for traininset) n times # add a black, dotted line to the subplot pl.plot(train_line_x, train_line_y, 'k--', color="black") pl.title(clf_name) print "Confusion Matrix for %s:" % clf_name print confusion_matrix(test.color, test.pred_color) pl.show()

### daoqinzi commented Aug 20, 2014

 Thanks for your code and I will get a better understanding.

 nice