erikbern/gist:4cff437097067142eca7

## gistfile1.py
import numpy as np
import matplotlib.pyplot as plt

mean = np.array([1, 1, 1])
cov = np.array([[1, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 0.5]])

people = np.random.multivariate_normal(mean, cov, 100000)

criterion = np.array([0, 0.2, 1.0])
scores = np.dot(people, criterion)
pc99 = np.percentile(scores, 99)

hired_people = np.array([p for p in people if np.dot(criterion, p) > pc99])
fired_people = np.array([p for p in people if np.dot(criterion, p) < pc99])

cf = np.corrcoef(hired_people[:,0], hired_people[:,1])[0,1]

plt.scatter(fired_people[:,0], fired_people[:,1], marker='x', c='blue')
plt.scatter(hired_people[:,0], hired_people[:,1], marker='x', c='red')
plt.ylabel('Programming competition skills')
plt.xlabel('What really matters')
plt.title('Correlation for hired people: %.4f' % cf)
plt.legend(['not hired people', 'hired people'], loc='lower right')
plt.savefig('recruiting_realistic.png')
	import numpy as np
	import matplotlib.pyplot as plt

	mean = np.array([1, 1, 1])
	cov = np.array([[1, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 0.5]])

	people = np.random.multivariate_normal(mean, cov, 100000)

	criterion = np.array([0, 0.2, 1.0])
	scores = np.dot(people, criterion)
	pc99 = np.percentile(scores, 99)

	hired_people = np.array([p for p in people if np.dot(criterion, p) > pc99])
	fired_people = np.array([p for p in people if np.dot(criterion, p) < pc99])

	cf = np.corrcoef(hired_people[:,0], hired_people[:,1])[0,1]

	plt.scatter(fired_people[:,0], fired_people[:,1], marker='x', c='blue')
	plt.scatter(hired_people[:,0], hired_people[:,1], marker='x', c='red')
	plt.ylabel('Programming competition skills')
	plt.xlabel('What really matters')
	plt.title('Correlation for hired people: %.4f' % cf)
	plt.legend(['not hired people', 'hired people'], loc='lower right')
	plt.savefig('recruiting_realistic.png')