standarderror/20160215_Decision_boundaries.py

## 20160215_Decision_boundaries.py
### Prepeare python
%pylab inline

import numpy as np
import pandas as pd
import statsmodels.api as sm
import scipy.stats as stats
import neurolab as nl
from sklearn.datasets import make_classification
from sklearn import neighbors, tree, svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB


### Helper functions for plotting
# A function to plot observations on scatterplot
def plotcases(ax):
    plt.scatter(xdf['x1'],xdf['x2'],c=xdf['y'], cmap=cm.coolwarm, axes=ax, alpha=0.6, s=20, lw=0.4)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.tick_params(axis='both', which='both', left='off', right='off', top='off', bottom='off',
                   labelleft='off', labelright='off', labeltop='off', labelbottom='off')

# a function to draw decision boundary and colour the regions
def plotboundary(ax, Z):
    ax.pcolormesh(xx, yy, Z, cmap=cm.coolwarm, alpha=0.1)
    ax.contour(xx, yy, Z, [0.5], linewidths=0.75, colors='k')


### Generate train data
X, y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0, n_repeated=0,
                           n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.02, class_sep=0.5,
                           hypercube=True, shift=0.0, scale=0.5, shuffle=True, random_state=5)

xdf = pd.DataFrame(X, columns=['x1','x2'])
xdf['y'] = y


### create plot canvas with 6x4 plots
fig = plt.figure(figsize(12,16), dpi=1600)
plt.subplots_adjust(hspace=.5)

nrows = 7
ncols = 4
gridsize = (nrows, ncols)


### 1. plot the problem ###
ax0 = plt.subplot2grid(gridsize,[0,0])
plotcases(ax0)
ax0.title.set_text("Problem")

# take boundaries from first plot and define mesh of points for plotting decision spaces
x_min, x_max = plt.xlim()
y_min, y_max = plt.ylim()
nx, ny = 100, 100   # this sets the num of points in the mesh
xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),
                     np.linspace(y_min, y_max, ny))


############# kNN #################
### 2. kNN with k=5
knn5 = neighbors.KNeighborsClassifier(n_neighbors=5)
knn5.fit(xdf[['x1','x2']], xdf['y'])

Z = knn5.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)

ax = plt.subplot2grid(gridsize, [1,0])
ax.title.set_text("kNN, k=5")
plotboundary(ax, Z)
plotcases(ax)


### 3. kNN with k=15
knn15 = neighbors.KNeighborsClassifier(n_neighbors=15)
knn15.fit(xdf[['x1','x2']], xdf['y'])

Z = knn15.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)

ax = plt.subplot2grid(gridsize,[1,1])
ax.title.set_text("kNN, k=15")
plotboundary(ax, Z)
plotcases(ax)


########### logistic regression ##########
### 4. Logistic regression - simple linear
formula = ('y ~ x1 + x2')
LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()

XX = pd.DataFrame((vstack([xx.ravel(), yy.ravel()]).T))
XX.columns = xdf.columns[0:2]
Z = LRm.predict(XX)
Z = Z.reshape(xx.shape)

ax = plt.subplot2grid(gridsize, [2,0])
ax.title.set_text("Logistic Regression\n simple")
plotboundary(ax, Z)
plotcases(ax)


### 5. Logistic regression - with basic polynomials
formula = ('y ~ x1 + x2 + I(x1**2) + I(x2**2)')
LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()

Z = LRm.predict(XX)
Z = Z.reshape(xx.shape)

ax = plt.subplot2grid(gridsize, [2,1])
ax.title.set_text("Logistic Regression\n basic polynomials")
plotboundary(ax, Z)
plotcases(ax)


### 6. Logistic regression - with polynomials & interactions
formula = ('y ~ x1 + x2 + x1*x2 + I(x1**2) + I(x2**2) + x1*I(x1**2) + x2*I(x2**2)\
                                + I(x1**3) + I(x2**3) + x1*I(x1**3) + x2*I(x2**3)')
LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()

Z = LRm.predict(XX)
Z = Z.reshape(xx.shape)

ax = plt.subplot2grid(gridsize, [2,2])
ax.title.set_text("Logistic Regression\n polynomials & interactions")
plotboundary(ax, Z)
plotcases(ax)


### 7. Logistic regression - with polynomials & interactions, regularised
# smaller alpha = weaker regularisation.
formula = ('y ~ x1 + x2 + x1*x2 + I(x1**2) + I(x2**2) + x1*I(x1**2) + x2*I(x2**2)\
                                + I(x1**3) + I(x2**3) + x1*I(x1**3) + x2*I(x2**3)')
LRm = sm.Logit.from_formula(formula=formula, data=xdf).fit_regularized(alpha=0.5, maxiter = 200)

Z = LRm.predict(XX)
Z = Z.reshape(xx.shape)

ax = plt.subplot2grid(gridsize, [2,3])
ax.title.set_text("Logistic Regression\n polynomials & interactions\n regularised")
plotboundary(ax, Z)
plotcases(ax)


#####  TREE METHODS  ########
### 8. Decision tree
dtree = tree.DecisionTreeClassifier()
dtree = dtree.fit(xdf[['x1','x2']], xdf['y'])

Z = dtree.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)

ax = plt.subplot2grid(gridsize, [3,0])
ax.title.set_text("Decision tree")
plotboundary(ax, Z)
plotcases(ax)


### 9. Random forest
rf = RandomForestClassifier()
rf.fit(xdf[['x1','x2']], xdf['y'])

Z = rf.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)

ax = plt.subplot2grid(gridsize, [3,1])
ax.title.set_text("Random forest")
plotboundary(ax, Z)
plotcases(ax)


###### SUPPORT VECTOR MACHINES ###############
## 10. SVM with 4th order polynomials
svc2 = svm.SVC(kernel='poly',degree=4, probability=True)
svc2.fit(xdf[['x1','x2']], xdf['y'])

Z = svc2.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)

ax = plt.subplot2grid(gridsize, [4,0])
ax.title.set_text("SVM\n4th order polynomials")
plotboundary(ax, Z)
plotcases(ax)


## 11. SVM with radial basis function
svc3 = svm.SVC(kernel='rbf', probability=True)
svc3.fit(xdf[['x1','x2']], xdf['y'])

Z = svc3.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)

ax = plt.subplot2grid(gridsize, [4,1])
ax.title.set_text("SVM\n Radial basis function")
plotboundary(ax, Z)
plotcases(ax)


####### Bayesian & Probabilistic methods ############
### 12. Gaussian Naive Bayes
gnb = GaussianNB()
gnb.fit(xdf[['x1','x2']], xdf['y'])

Z = gnb.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:,1].reshape(xx.shape)

ax = plt.subplot2grid(gridsize, [5,0])
ax.title.set_text("Gaussian Naive Bayes")
plotboundary(ax, Z)
plotcases(ax)


### 13. Gaussian Bayes, non-naive (joint distribution)
## Assumes multivariate normality
# Specify prior: p(class). Assume that this is constant across all regions
pClass1 = len(xdf[xdf['y']==1]) / float(len(xdf))
pClass2 = len(xdf[xdf['y']==0]) / float(len(xdf))

## Create likelihood distribution: p(datapoint | class)
## For each class, determine mean and variance across both variables
# Collect means for both variables, for both classes
Mux1C1 = xdf['x1'][xdf['y']==1].mean()
Mux2C1 = xdf['x2'][xdf['y']==1].mean()
Mux1C2 = xdf['x1'][xdf['y']==0].mean()
Mux2C2 = xdf['x2'][xdf['y']==0].mean()

# Collect vars for both variables, for both classes
Varx1C1 = xdf['x1'][xdf['y']==1].std()
Varx2C1 = xdf['x2'][xdf['y']==1].std()
Varx1C2 = xdf['x1'][xdf['y']==0].std()
Varx2C2 = xdf['x2'][xdf['y']==0].std()

# use to create Normal distributions from these variables
rangex1 = np.linspace(xdf['x1'].min(), xdf['x1'].max(), num=100)
rangex2 = np.linspace(xdf['x2'].min(), xdf['x2'].max(), num=100)

# generate Gaussian distributions for x1 and x2 within both class1 and class2
C1x1 = np.array([stats.norm(Mux1C1, Varx1C1).pdf(i) for i in rangex1])
C1x2 = np.array([stats.norm(Mux2C1, Varx2C1).pdf(i) for i in rangex2])
C2x1 = np.array([stats.norm(Mux1C2, Varx1C2).pdf(i) for i in rangex1])
C2x2 = np.array([stats.norm(Mux2C2, Varx2C2).pdf(i) for i in rangex2])

# use this to create a joint likelihood distributions for class1 and class2:
# These contain the joint probabilities for any given point whether it is in class1 or class2
pLikelihoodC1Joint = np.dot(C1x2[:, None], C1x1[None, :])
pLikelihoodC2Joint = np.dot(C2x2[:, None], C2x1[None, :])

# Finally, put them together to create the two posterior distributions: p(class | data)
ScoreClass1GivenData = pLikelihoodC1Joint * pClass1 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))
ScoreClass2GivenData = pLikelihoodC2Joint * pClass2 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))

## for a given point, we calculate both likelihoods, and the max is the class we allocate to:
# eg ScoreClass1GivenData > ScoreClass2GivenData
Z = ScoreClass1GivenData > ScoreClass2GivenData
Z = Z.reshape(xx.shape)

ax = plt.subplot2grid(gridsize,[5,1])
ax.title.set_text("Bayes: Full joint\n (Guassian) distribution")
plotboundary(ax, Z)
plotcases(ax)


### 14. Bayes with joint, nonparametric (kernel density estimated) distributions
##  = Does NOT assume normality

# Specify prior: p(class). [same as above]

# Create likelihood distribution: p(datapoint | class)
# = estimate distribution using Kernel Density
dens_1 = sm.nonparametric.KDEMultivariate(data=xdf[['x1','x2']][xdf['y']==0],
                                          var_type='cc', bw='normal_reference')

dens_2 = sm.nonparametric.KDEMultivariate(data=xdf[['x1','x2']][xdf['y']==1],
                                          var_type='cc', bw='normal_reference')

# generate distributions for x1 and x2 within both class1 and class2
pLikelihoodC1Joint = dens_1.pdf(c_[xx.ravel(),yy.ravel()])
pLikelihoodC2Joint = dens_2.pdf(c_[xx.ravel(),yy.ravel()])

# Use Bayes rule to get posterior distribution, then convert back to matrix form
ScoreClass1GivenData = pLikelihoodC1Joint * pClass1 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))
ScoreClass2GivenData = pLikelihoodC2Joint * pClass2 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))

ScoreClass1GivenData = ScoreClass1GivenData.reshape(xx.shape)
ScoreClass2GivenData = ScoreClass2GivenData.reshape(xx.shape)

# predict most likely class at each point on grid
Z = ScoreClass1GivenData < ScoreClass2GivenData
Z = Z.reshape(xx.shape)

ax = plt.subplot2grid(gridsize, [5,2])
ax.title.set_text("Bayes: Full joint\n KDE distribution")
plotboundary(ax, Z)
plotcases(ax)


####### Neural Networks #########
## Nb. these take a while to train & the neurolab library can be a bit tempramental
## Can delete this chunk of code if desired
yarr = np.matrix(y).T

## Neural Net - 1HL,  3 hidden nodes
net3 = nl.net.newff([[np.min( X[:,0]), np.max( X[:,0])], [np.min( X[:,1]), np.max( X[:,1])]], [4, 1])
net3.trainf = nl.train.train_gd  # use gradient descent, bfgs is too buggy in neurolab
err = net3.train(X, yarr, show=100, goal=0.01)

Z = net3.sim(np.vstack([xx.ravel(), yy.ravel()]).T)
Z = Z.reshape(xx.shape)

ax = plt.subplot2grid(gridsize,[6,0])
ax.title.set_text("Neural Net\n 1HL x 4 nodes")
plotboundary(ax, Z)
plotcases(ax)


## Neural Net - 2HL,  4 hidden nodes eacg
net44 = nl.net.newff([[np.min( X[:,0]), np.max( X[:,0])], [np.min( X[:,1]), np.max( X[:,1])]], [4, 4, 1])
net44.trainf = nl.train.train_gd  # use gradient descent, bfgs is too buggy in neurolab
err = net44.train(X, yarr, show=100, goal=0.01, lr=0.01)

Z = net44.sim(np.vstack([xx.ravel(), yy.ravel()]).T)
Z = Z.reshape(xx.shape)

ax = plt.subplot2grid(gridsize,[6,1])
ax.title.set_text("Neural Net\n 2HL x 4 nodes each")
plotboundary(ax, Z)
plotcases(ax)
	### Prepeare python
	%pylab inline

	import numpy as np
	import pandas as pd
	import statsmodels.api as sm
	import scipy.stats as stats
	import neurolab as nl
	from sklearn.datasets import make_classification
	from sklearn import neighbors, tree, svm
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.naive_bayes import GaussianNB


	### Helper functions for plotting
	# A function to plot observations on scatterplot
	def plotcases(ax):
	plt.scatter(xdf['x1'],xdf['x2'],c=xdf['y'], cmap=cm.coolwarm, axes=ax, alpha=0.6, s=20, lw=0.4)
	ax.spines['top'].set_visible(False)
	ax.spines['right'].set_visible(False)
	ax.spines['left'].set_visible(False)
	ax.spines['bottom'].set_visible(False)
	ax.tick_params(axis='both', which='both', left='off', right='off', top='off', bottom='off',
	labelleft='off', labelright='off', labeltop='off', labelbottom='off')

	# a function to draw decision boundary and colour the regions
	def plotboundary(ax, Z):
	ax.pcolormesh(xx, yy, Z, cmap=cm.coolwarm, alpha=0.1)
	ax.contour(xx, yy, Z, [0.5], linewidths=0.75, colors='k')


	### Generate train data
	X, y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0, n_repeated=0,
	n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.02, class_sep=0.5,
	hypercube=True, shift=0.0, scale=0.5, shuffle=True, random_state=5)

	xdf = pd.DataFrame(X, columns=['x1','x2'])
	xdf['y'] = y


	### create plot canvas with 6x4 plots
	fig = plt.figure(figsize(12,16), dpi=1600)
	plt.subplots_adjust(hspace=.5)

	nrows = 7
	ncols = 4
	gridsize = (nrows, ncols)


	### 1. plot the problem ###
	ax0 = plt.subplot2grid(gridsize,[0,0])
	plotcases(ax0)
	ax0.title.set_text("Problem")

	# take boundaries from first plot and define mesh of points for plotting decision spaces
	x_min, x_max = plt.xlim()
	y_min, y_max = plt.ylim()
	nx, ny = 100, 100 # this sets the num of points in the mesh
	xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),
	np.linspace(y_min, y_max, ny))


	############# kNN #################
	### 2. kNN with k=5
	knn5 = neighbors.KNeighborsClassifier(n_neighbors=5)
	knn5.fit(xdf[['x1','x2']], xdf['y'])

	Z = knn5.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [1,0])
	ax.title.set_text("kNN, k=5")
	plotboundary(ax, Z)
	plotcases(ax)


	### 3. kNN with k=15
	knn15 = neighbors.KNeighborsClassifier(n_neighbors=15)
	knn15.fit(xdf[['x1','x2']], xdf['y'])

	Z = knn15.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize,[1,1])
	ax.title.set_text("kNN, k=15")
	plotboundary(ax, Z)
	plotcases(ax)


	########### logistic regression ##########
	### 4. Logistic regression - simple linear
	formula = ('y ~ x1 + x2')
	LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()

	XX = pd.DataFrame((vstack([xx.ravel(), yy.ravel()]).T))
	XX.columns = xdf.columns[0:2]
	Z = LRm.predict(XX)
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [2,0])
	ax.title.set_text("Logistic Regression\n simple")
	plotboundary(ax, Z)
	plotcases(ax)


	### 5. Logistic regression - with basic polynomials
	formula = ('y ~ x1 + x2 + I(x12) + I(x22)')
	LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()

	Z = LRm.predict(XX)
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [2,1])
	ax.title.set_text("Logistic Regression\n basic polynomials")
	plotboundary(ax, Z)
	plotcases(ax)


	### 6. Logistic regression - with polynomials & interactions
	formula = ('y ~ x1 + x2 + x1x2 + I(x12) + I(x22) + x1I(x1*2) + x2I(x2**2)\
	+ I(x13) + I(x23) + x1I(x13) + x2I(x2**3)')
	LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()

	Z = LRm.predict(XX)
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [2,2])
	ax.title.set_text("Logistic Regression\n polynomials & interactions")
	plotboundary(ax, Z)
	plotcases(ax)


	### 7. Logistic regression - with polynomials & interactions, regularised
	# smaller alpha = weaker regularisation.
	formula = ('y ~ x1 + x2 + x1x2 + I(x12) + I(x22) + x1I(x1*2) + x2I(x2**2)\
	+ I(x13) + I(x23) + x1I(x13) + x2I(x2**3)')
	LRm = sm.Logit.from_formula(formula=formula, data=xdf).fit_regularized(alpha=0.5, maxiter = 200)

	Z = LRm.predict(XX)
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [2,3])
	ax.title.set_text("Logistic Regression\n polynomials & interactions\n regularised")
	plotboundary(ax, Z)
	plotcases(ax)



	##### TREE METHODS ########
	### 8. Decision tree
	dtree = tree.DecisionTreeClassifier()
	dtree = dtree.fit(xdf[['x1','x2']], xdf['y'])

	Z = dtree.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [3,0])
	ax.title.set_text("Decision tree")
	plotboundary(ax, Z)
	plotcases(ax)


	### 9. Random forest
	rf = RandomForestClassifier()
	rf.fit(xdf[['x1','x2']], xdf['y'])

	Z = rf.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [3,1])
	ax.title.set_text("Random forest")
	plotboundary(ax, Z)
	plotcases(ax)



	###### SUPPORT VECTOR MACHINES ###############
	## 10. SVM with 4th order polynomials
	svc2 = svm.SVC(kernel='poly',degree=4, probability=True)
	svc2.fit(xdf[['x1','x2']], xdf['y'])

	Z = svc2.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [4,0])
	ax.title.set_text("SVM\n4th order polynomials")
	plotboundary(ax, Z)
	plotcases(ax)



	## 11. SVM with radial basis function
	svc3 = svm.SVC(kernel='rbf', probability=True)
	svc3.fit(xdf[['x1','x2']], xdf['y'])

	Z = svc3.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [4,1])
	ax.title.set_text("SVM\n Radial basis function")
	plotboundary(ax, Z)
	plotcases(ax)


	####### Bayesian & Probabilistic methods ############
	### 12. Gaussian Naive Bayes
	gnb = GaussianNB()
	gnb.fit(xdf[['x1','x2']], xdf['y'])

	Z = gnb.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [5,0])
	ax.title.set_text("Gaussian Naive Bayes")
	plotboundary(ax, Z)
	plotcases(ax)


	### 13. Gaussian Bayes, non-naive (joint distribution)
	## Assumes multivariate normality
	# Specify prior: p(class). Assume that this is constant across all regions
	pClass1 = len(xdf[xdf['y']==1]) / float(len(xdf))
	pClass2 = len(xdf[xdf['y']==0]) / float(len(xdf))

	## Create likelihood distribution: p(datapoint \| class)
	## For each class, determine mean and variance across both variables
	# Collect means for both variables, for both classes
	Mux1C1 = xdf['x1'][xdf['y']==1].mean()
	Mux2C1 = xdf['x2'][xdf['y']==1].mean()
	Mux1C2 = xdf['x1'][xdf['y']==0].mean()
	Mux2C2 = xdf['x2'][xdf['y']==0].mean()

	# Collect vars for both variables, for both classes
	Varx1C1 = xdf['x1'][xdf['y']==1].std()
	Varx2C1 = xdf['x2'][xdf['y']==1].std()
	Varx1C2 = xdf['x1'][xdf['y']==0].std()
	Varx2C2 = xdf['x2'][xdf['y']==0].std()

	# use to create Normal distributions from these variables
	rangex1 = np.linspace(xdf['x1'].min(), xdf['x1'].max(), num=100)
	rangex2 = np.linspace(xdf['x2'].min(), xdf['x2'].max(), num=100)

	# generate Gaussian distributions for x1 and x2 within both class1 and class2
	C1x1 = np.array([stats.norm(Mux1C1, Varx1C1).pdf(i) for i in rangex1])
	C1x2 = np.array([stats.norm(Mux2C1, Varx2C1).pdf(i) for i in rangex2])
	C2x1 = np.array([stats.norm(Mux1C2, Varx1C2).pdf(i) for i in rangex1])
	C2x2 = np.array([stats.norm(Mux2C2, Varx2C2).pdf(i) for i in rangex2])

	# use this to create a joint likelihood distributions for class1 and class2:
	# These contain the joint probabilities for any given point whether it is in class1 or class2
	pLikelihoodC1Joint = np.dot(C1x2[:, None], C1x1[None, :])
	pLikelihoodC2Joint = np.dot(C2x2[:, None], C2x1[None, :])

	# Finally, put them together to create the two posterior distributions: p(class \| data)
	ScoreClass1GivenData = pLikelihoodC1Joint * pClass1 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))
	ScoreClass2GivenData = pLikelihoodC2Joint * pClass2 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))

	## for a given point, we calculate both likelihoods, and the max is the class we allocate to:
	# eg ScoreClass1GivenData > ScoreClass2GivenData
	Z = ScoreClass1GivenData > ScoreClass2GivenData
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize,[5,1])
	ax.title.set_text("Bayes: Full joint\n (Guassian) distribution")
	plotboundary(ax, Z)
	plotcases(ax)


	### 14. Bayes with joint, nonparametric (kernel density estimated) distributions
	## = Does NOT assume normality

	# Specify prior: p(class). [same as above]

	# Create likelihood distribution: p(datapoint \| class)
	# = estimate distribution using Kernel Density
	dens_1 = sm.nonparametric.KDEMultivariate(data=xdf[['x1','x2']][xdf['y']==0],
	var_type='cc', bw='normal_reference')

	dens_2 = sm.nonparametric.KDEMultivariate(data=xdf[['x1','x2']][xdf['y']==1],
	var_type='cc', bw='normal_reference')

	# generate distributions for x1 and x2 within both class1 and class2
	pLikelihoodC1Joint = dens_1.pdf(c_[xx.ravel(),yy.ravel()])
	pLikelihoodC2Joint = dens_2.pdf(c_[xx.ravel(),yy.ravel()])

	# Use Bayes rule to get posterior distribution, then convert back to matrix form
	ScoreClass1GivenData = pLikelihoodC1Joint * pClass1 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))
	ScoreClass2GivenData = pLikelihoodC2Joint * pClass2 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))

	ScoreClass1GivenData = ScoreClass1GivenData.reshape(xx.shape)
	ScoreClass2GivenData = ScoreClass2GivenData.reshape(xx.shape)

	# predict most likely class at each point on grid
	Z = ScoreClass1GivenData < ScoreClass2GivenData
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [5,2])
	ax.title.set_text("Bayes: Full joint\n KDE distribution")
	plotboundary(ax, Z)
	plotcases(ax)


	####### Neural Networks #########
	## Nb. these take a while to train & the neurolab library can be a bit tempramental
	## Can delete this chunk of code if desired
	yarr = np.matrix(y).T

	## Neural Net - 1HL, 3 hidden nodes
	net3 = nl.net.newff([[np.min( X[:,0]), np.max( X[:,0])], [np.min( X[:,1]), np.max( X[:,1])]], [4, 1])
	net3.trainf = nl.train.train_gd # use gradient descent, bfgs is too buggy in neurolab
	err = net3.train(X, yarr, show=100, goal=0.01)

	Z = net3.sim(np.vstack([xx.ravel(), yy.ravel()]).T)
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize,[6,0])
	ax.title.set_text("Neural Net\n 1HL x 4 nodes")
	plotboundary(ax, Z)
	plotcases(ax)


	## Neural Net - 2HL, 4 hidden nodes eacg
	net44 = nl.net.newff([[np.min( X[:,0]), np.max( X[:,0])], [np.min( X[:,1]), np.max( X[:,1])]], [4, 4, 1])
	net44.trainf = nl.train.train_gd # use gradient descent, bfgs is too buggy in neurolab
	err = net44.train(X, yarr, show=100, goal=0.01, lr=0.01)

	Z = net44.sim(np.vstack([xx.ravel(), yy.ravel()]).T)
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize,[6,1])
	ax.title.set_text("Neural Net\n 2HL x 4 nodes each")
	plotboundary(ax, Z)
	plotcases(ax)