iandewancker/gist:e698014cdfdfa2ba562a479b75fc9766

## gistfile1.txt
%matplotlib
import matplotlib
matplotlib.rcParams.update({'font.size': 32})
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np; np.random.seed(10)
mean1, cov1 = [0, 2], [(0.5, .25), (.25, 0.5)]
mean2, cov2 = [2.5, -0.5], [(0.01, 0.02), (0.02, 0.01)]
mean3, cov3 = [3.2,  1.0], [(0.01, 0.02), (0.02, 0.01)]
x1, y1 = np.random.multivariate_normal(mean1, cov1, size=4000).T
x2, y2 = np.random.multivariate_normal(mean2, cov2, size=5).T
x3, y3 = np.random.multivariate_normal(mean3, cov3, size=5).T
x = np.hstack([x1,x2,x3])
y = np.hstack([y1,y2,y3])
plt.figure()
plt.xlim(-4.0,4.0)
plt.ylim(-2.0,6.0)
plt.axes().set_aspect('equal')
plt.axes().set_adjustable('datalim')
sns.regplot(x, y,fit_reg=False, color='green')
plt.ylabel('Feature 1')
plt.xlabel('Feature 2')
plt.title('Samples from True Distribution')

x_false = np.random.uniform(-4.0,4.0,12000)
y_false = np.random.uniform(-2.0,6.0,12000)

X = np.vstack([zip(x,y),zip(x_false,y_false)])
y_train = np.hstack([np.ones(y.shape[0]),np.zeros(y_false.shape[0])])

from sklearn import linear_model
clf_lr = linear_model.LogisticRegression()
clf_lr.fit(X,y_train)

x_m = np.arange(-4.0,4.0,0.01)
y_m = np.arange(-2.0,6.0,0.01)
xm1, ym1 = np.meshgrid(x_m, y_m)
z_m = clf_lr.predict_proba(zip(xm1.flatten(),ym1.flatten()))[:,1]
z_m = z_m.reshape(xm1.shape[0], xm1.shape[1])
plt.figure()
plt.contourf(xm1,ym1,z_m,50,cmap="BuGn")
plt.ylabel('Feature 1')
plt.xlabel('Feature 2')
plt.xlim((-4,4))
plt.ylim((-2,6))
plt.axis('equal')
plt.title('LR Model')

import sklearn.ensemble
clf_rf = sklearn.ensemble.RandomForestClassifier(n_estimators=10,max_depth=15)
clf_rf.fit(X,y_train)

x_m = np.arange(-4.0,4.0,0.01)
y_m = np.arange(-2.0,6.0,0.01)
xm1, ym1 = np.meshgrid(x_m, y_m)
z_m = clf_rf.predict_proba(zip(xm1.flatten(),ym1.flatten()))[:,1]
z_m = z_m.reshape(xm1.shape[0], xm1.shape[1])
plt.contourf(xm1,ym1,z_m,50,cmap="BuGn")
plt.ylabel('Feature 1')
plt.xlabel('Feature 2')
plt.xlim((-4,4))
plt.ylim((-2,6))
plt.axis('equal')
plt.title('RF Model')
plt.show()
	%matplotlib
	import matplotlib
	matplotlib.rcParams.update({'font.size': 32})
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import numpy as np; np.random.seed(10)
	mean1, cov1 = [0, 2], [(0.5, .25), (.25, 0.5)]
	mean2, cov2 = [2.5, -0.5], [(0.01, 0.02), (0.02, 0.01)]
	mean3, cov3 = [3.2, 1.0], [(0.01, 0.02), (0.02, 0.01)]
	x1, y1 = np.random.multivariate_normal(mean1, cov1, size=4000).T
	x2, y2 = np.random.multivariate_normal(mean2, cov2, size=5).T
	x3, y3 = np.random.multivariate_normal(mean3, cov3, size=5).T
	x = np.hstack([x1,x2,x3])
	y = np.hstack([y1,y2,y3])
	plt.figure()
	plt.xlim(-4.0,4.0)
	plt.ylim(-2.0,6.0)
	plt.axes().set_aspect('equal')
	plt.axes().set_adjustable('datalim')
	sns.regplot(x, y,fit_reg=False, color='green')
	plt.ylabel('Feature 1')
	plt.xlabel('Feature 2')
	plt.title('Samples from True Distribution')

	x_false = np.random.uniform(-4.0,4.0,12000)
	y_false = np.random.uniform(-2.0,6.0,12000)

	X = np.vstack([zip(x,y),zip(x_false,y_false)])
	y_train = np.hstack([np.ones(y.shape[0]),np.zeros(y_false.shape[0])])

	from sklearn import linear_model
	clf_lr = linear_model.LogisticRegression()
	clf_lr.fit(X,y_train)

	x_m = np.arange(-4.0,4.0,0.01)
	y_m = np.arange(-2.0,6.0,0.01)
	xm1, ym1 = np.meshgrid(x_m, y_m)
	z_m = clf_lr.predict_proba(zip(xm1.flatten(),ym1.flatten()))[:,1]
	z_m = z_m.reshape(xm1.shape[0], xm1.shape[1])
	plt.figure()
	plt.contourf(xm1,ym1,z_m,50,cmap="BuGn")
	plt.ylabel('Feature 1')
	plt.xlabel('Feature 2')
	plt.xlim((-4,4))
	plt.ylim((-2,6))
	plt.axis('equal')
	plt.title('LR Model')

	import sklearn.ensemble
	clf_rf = sklearn.ensemble.RandomForestClassifier(n_estimators=10,max_depth=15)
	clf_rf.fit(X,y_train)

	x_m = np.arange(-4.0,4.0,0.01)
	y_m = np.arange(-2.0,6.0,0.01)
	xm1, ym1 = np.meshgrid(x_m, y_m)
	z_m = clf_rf.predict_proba(zip(xm1.flatten(),ym1.flatten()))[:,1]
	z_m = z_m.reshape(xm1.shape[0], xm1.shape[1])
	plt.contourf(xm1,ym1,z_m,50,cmap="BuGn")
	plt.ylabel('Feature 1')
	plt.xlabel('Feature 2')
	plt.xlim((-4,4))
	plt.ylim((-2,6))
	plt.axis('equal')
	plt.title('RF Model')
	plt.show()