Skip to content

Instantly share code, notes, and snippets.

@standarderror
Last active February 16, 2016 01:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save standarderror/871e49a8fa9c0c39aa66 to your computer and use it in GitHub Desktop.
Save standarderror/871e49a8fa9c0c39aa66 to your computer and use it in GitHub Desktop.
20160215 Decision Boundaries
### Prepeare python
%pylab inline
import numpy as np
import pandas as pd
import statsmodels.api as sm
import scipy.stats as stats
import neurolab as nl
from sklearn.datasets import make_classification
from sklearn import neighbors, tree, svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
### Helper functions for plotting
# A function to plot observations on scatterplot
def plotcases(ax):
plt.scatter(xdf['x1'],xdf['x2'],c=xdf['y'], cmap=cm.coolwarm, axes=ax, alpha=0.6, s=20, lw=0.4)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.tick_params(axis='both', which='both', left='off', right='off', top='off', bottom='off',
labelleft='off', labelright='off', labeltop='off', labelbottom='off')
# a function to draw decision boundary and colour the regions
def plotboundary(ax, Z):
ax.pcolormesh(xx, yy, Z, cmap=cm.coolwarm, alpha=0.1)
ax.contour(xx, yy, Z, [0.5], linewidths=0.75, colors='k')
### Generate train data
X, y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0, n_repeated=0,
n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.02, class_sep=0.5,
hypercube=True, shift=0.0, scale=0.5, shuffle=True, random_state=5)
xdf = pd.DataFrame(X, columns=['x1','x2'])
xdf['y'] = y
### create plot canvas with 6x4 plots
fig = plt.figure(figsize(12,16), dpi=1600)
plt.subplots_adjust(hspace=.5)
nrows = 7
ncols = 4
gridsize = (nrows, ncols)
### 1. plot the problem ###
ax0 = plt.subplot2grid(gridsize,[0,0])
plotcases(ax0)
ax0.title.set_text("Problem")
# take boundaries from first plot and define mesh of points for plotting decision spaces
x_min, x_max = plt.xlim()
y_min, y_max = plt.ylim()
nx, ny = 100, 100 # this sets the num of points in the mesh
xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),
np.linspace(y_min, y_max, ny))
############# kNN #################
### 2. kNN with k=5
knn5 = neighbors.KNeighborsClassifier(n_neighbors=5)
knn5.fit(xdf[['x1','x2']], xdf['y'])
Z = knn5.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)
ax = plt.subplot2grid(gridsize, [1,0])
ax.title.set_text("kNN, k=5")
plotboundary(ax, Z)
plotcases(ax)
### 3. kNN with k=15
knn15 = neighbors.KNeighborsClassifier(n_neighbors=15)
knn15.fit(xdf[['x1','x2']], xdf['y'])
Z = knn15.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)
ax = plt.subplot2grid(gridsize,[1,1])
ax.title.set_text("kNN, k=15")
plotboundary(ax, Z)
plotcases(ax)
########### logistic regression ##########
### 4. Logistic regression - simple linear
formula = ('y ~ x1 + x2')
LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()
XX = pd.DataFrame((vstack([xx.ravel(), yy.ravel()]).T))
XX.columns = xdf.columns[0:2]
Z = LRm.predict(XX)
Z = Z.reshape(xx.shape)
ax = plt.subplot2grid(gridsize, [2,0])
ax.title.set_text("Logistic Regression\n simple")
plotboundary(ax, Z)
plotcases(ax)
### 5. Logistic regression - with basic polynomials
formula = ('y ~ x1 + x2 + I(x1**2) + I(x2**2)')
LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()
Z = LRm.predict(XX)
Z = Z.reshape(xx.shape)
ax = plt.subplot2grid(gridsize, [2,1])
ax.title.set_text("Logistic Regression\n basic polynomials")
plotboundary(ax, Z)
plotcases(ax)
### 6. Logistic regression - with polynomials & interactions
formula = ('y ~ x1 + x2 + x1*x2 + I(x1**2) + I(x2**2) + x1*I(x1**2) + x2*I(x2**2)\
+ I(x1**3) + I(x2**3) + x1*I(x1**3) + x2*I(x2**3)')
LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()
Z = LRm.predict(XX)
Z = Z.reshape(xx.shape)
ax = plt.subplot2grid(gridsize, [2,2])
ax.title.set_text("Logistic Regression\n polynomials & interactions")
plotboundary(ax, Z)
plotcases(ax)
### 7. Logistic regression - with polynomials & interactions, regularised
# smaller alpha = weaker regularisation.
formula = ('y ~ x1 + x2 + x1*x2 + I(x1**2) + I(x2**2) + x1*I(x1**2) + x2*I(x2**2)\
+ I(x1**3) + I(x2**3) + x1*I(x1**3) + x2*I(x2**3)')
LRm = sm.Logit.from_formula(formula=formula, data=xdf).fit_regularized(alpha=0.5, maxiter = 200)
Z = LRm.predict(XX)
Z = Z.reshape(xx.shape)
ax = plt.subplot2grid(gridsize, [2,3])
ax.title.set_text("Logistic Regression\n polynomials & interactions\n regularised")
plotboundary(ax, Z)
plotcases(ax)
##### TREE METHODS ########
### 8. Decision tree
dtree = tree.DecisionTreeClassifier()
dtree = dtree.fit(xdf[['x1','x2']], xdf['y'])
Z = dtree.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)
ax = plt.subplot2grid(gridsize, [3,0])
ax.title.set_text("Decision tree")
plotboundary(ax, Z)
plotcases(ax)
### 9. Random forest
rf = RandomForestClassifier()
rf.fit(xdf[['x1','x2']], xdf['y'])
Z = rf.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)
ax = plt.subplot2grid(gridsize, [3,1])
ax.title.set_text("Random forest")
plotboundary(ax, Z)
plotcases(ax)
###### SUPPORT VECTOR MACHINES ###############
## 10. SVM with 4th order polynomials
svc2 = svm.SVC(kernel='poly',degree=4, probability=True)
svc2.fit(xdf[['x1','x2']], xdf['y'])
Z = svc2.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)
ax = plt.subplot2grid(gridsize, [4,0])
ax.title.set_text("SVM\n4th order polynomials")
plotboundary(ax, Z)
plotcases(ax)
## 11. SVM with radial basis function
svc3 = svm.SVC(kernel='rbf', probability=True)
svc3.fit(xdf[['x1','x2']], xdf['y'])
Z = svc3.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)
ax = plt.subplot2grid(gridsize, [4,1])
ax.title.set_text("SVM\n Radial basis function")
plotboundary(ax, Z)
plotcases(ax)
####### Bayesian & Probabilistic methods ############
### 12. Gaussian Naive Bayes
gnb = GaussianNB()
gnb.fit(xdf[['x1','x2']], xdf['y'])
Z = gnb.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)
ax = plt.subplot2grid(gridsize, [5,0])
ax.title.set_text("Gaussian Naive Bayes")
plotboundary(ax, Z)
plotcases(ax)
### 13. Gaussian Bayes, non-naive (joint distribution)
## Assumes multivariate normality
# Specify prior: p(class). Assume that this is constant across all regions
pClass1 = len(xdf[xdf['y']==1]) / float(len(xdf))
pClass2 = len(xdf[xdf['y']==0]) / float(len(xdf))
## Create likelihood distribution: p(datapoint | class)
## For each class, determine mean and variance across both variables
# Collect means for both variables, for both classes
Mux1C1 = xdf['x1'][xdf['y']==1].mean()
Mux2C1 = xdf['x2'][xdf['y']==1].mean()
Mux1C2 = xdf['x1'][xdf['y']==0].mean()
Mux2C2 = xdf['x2'][xdf['y']==0].mean()
# Collect vars for both variables, for both classes
Varx1C1 = xdf['x1'][xdf['y']==1].std()
Varx2C1 = xdf['x2'][xdf['y']==1].std()
Varx1C2 = xdf['x1'][xdf['y']==0].std()
Varx2C2 = xdf['x2'][xdf['y']==0].std()
# use to create Normal distributions from these variables
rangex1 = np.linspace(xdf['x1'].min(), xdf['x1'].max(), num=100)
rangex2 = np.linspace(xdf['x2'].min(), xdf['x2'].max(), num=100)
# generate Gaussian distributions for x1 and x2 within both class1 and class2
C1x1 = np.array([stats.norm(Mux1C1, Varx1C1).pdf(i) for i in rangex1])
C1x2 = np.array([stats.norm(Mux2C1, Varx2C1).pdf(i) for i in rangex2])
C2x1 = np.array([stats.norm(Mux1C2, Varx1C2).pdf(i) for i in rangex1])
C2x2 = np.array([stats.norm(Mux2C2, Varx2C2).pdf(i) for i in rangex2])
# use this to create a joint likelihood distributions for class1 and class2:
# These contain the joint probabilities for any given point whether it is in class1 or class2
pLikelihoodC1Joint = np.dot(C1x2[:, None], C1x1[None, :])
pLikelihoodC2Joint = np.dot(C2x2[:, None], C2x1[None, :])
# Finally, put them together to create the two posterior distributions: p(class | data)
ScoreClass1GivenData = pLikelihoodC1Joint * pClass1 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))
ScoreClass2GivenData = pLikelihoodC2Joint * pClass2 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))
## for a given point, we calculate both likelihoods, and the max is the class we allocate to:
# eg ScoreClass1GivenData > ScoreClass2GivenData
Z = ScoreClass1GivenData > ScoreClass2GivenData
Z = Z.reshape(xx.shape)
ax = plt.subplot2grid(gridsize,[5,1])
ax.title.set_text("Bayes: Full joint\n (Guassian) distribution")
plotboundary(ax, Z)
plotcases(ax)
### 14. Bayes with joint, nonparametric (kernel density estimated) distributions
## = Does NOT assume normality
# Specify prior: p(class). [same as above]
# Create likelihood distribution: p(datapoint | class)
# = estimate distribution using Kernel Density
dens_1 = sm.nonparametric.KDEMultivariate(data=xdf[['x1','x2']][xdf['y']==0],
var_type='cc', bw='normal_reference')
dens_2 = sm.nonparametric.KDEMultivariate(data=xdf[['x1','x2']][xdf['y']==1],
var_type='cc', bw='normal_reference')
# generate distributions for x1 and x2 within both class1 and class2
pLikelihoodC1Joint = dens_1.pdf(c_[xx.ravel(),yy.ravel()])
pLikelihoodC2Joint = dens_2.pdf(c_[xx.ravel(),yy.ravel()])
# Use Bayes rule to get posterior distribution, then convert back to matrix form
ScoreClass1GivenData = pLikelihoodC1Joint * pClass1 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))
ScoreClass2GivenData = pLikelihoodC2Joint * pClass2 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))
ScoreClass1GivenData = ScoreClass1GivenData.reshape(xx.shape)
ScoreClass2GivenData = ScoreClass2GivenData.reshape(xx.shape)
# predict most likely class at each point on grid
Z = ScoreClass1GivenData < ScoreClass2GivenData
Z = Z.reshape(xx.shape)
ax = plt.subplot2grid(gridsize, [5,2])
ax.title.set_text("Bayes: Full joint\n KDE distribution")
plotboundary(ax, Z)
plotcases(ax)
####### Neural Networks #########
## Nb. these take a while to train & the neurolab library can be a bit tempramental
## Can delete this chunk of code if desired
yarr = np.matrix(y).T
## Neural Net - 1HL, 3 hidden nodes
net3 = nl.net.newff([[np.min( X[:,0]), np.max( X[:,0])], [np.min( X[:,1]), np.max( X[:,1])]], [4, 1])
net3.trainf = nl.train.train_gd # use gradient descent, bfgs is too buggy in neurolab
err = net3.train(X, yarr, show=100, goal=0.01)
Z = net3.sim(np.vstack([xx.ravel(), yy.ravel()]).T)
Z = Z.reshape(xx.shape)
ax = plt.subplot2grid(gridsize,[6,0])
ax.title.set_text("Neural Net\n 1HL x 4 nodes")
plotboundary(ax, Z)
plotcases(ax)
## Neural Net - 2HL, 4 hidden nodes eacg
net44 = nl.net.newff([[np.min( X[:,0]), np.max( X[:,0])], [np.min( X[:,1]), np.max( X[:,1])]], [4, 4, 1])
net44.trainf = nl.train.train_gd # use gradient descent, bfgs is too buggy in neurolab
err = net44.train(X, yarr, show=100, goal=0.01, lr=0.01)
Z = net44.sim(np.vstack([xx.ravel(), yy.ravel()]).T)
Z = Z.reshape(xx.shape)
ax = plt.subplot2grid(gridsize,[6,1])
ax.title.set_text("Neural Net\n 2HL x 4 nodes each")
plotboundary(ax, Z)
plotcases(ax)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment