Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
import numpy as np
import matplotlib.pyplot as plt
mean = np.array([1, 1, 1])
cov = np.array([[1, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 0.5]])
people = np.random.multivariate_normal(mean, cov, 100000)
criterion = np.array([0, 0.2, 1.0])
scores =, criterion)
pc99 = np.percentile(scores, 99)
hired_people = np.array([p for p in people if, p) > pc99])
fired_people = np.array([p for p in people if, p) < pc99])
cf = np.corrcoef(hired_people[:,0], hired_people[:,1])[0,1]
plt.scatter(fired_people[:,0], fired_people[:,1], marker='x', c='blue')
plt.scatter(hired_people[:,0], hired_people[:,1], marker='x', c='red')
plt.ylabel('Programming competition skills')
plt.xlabel('What really matters')
plt.title('Correlation for hired people: %.4f' % cf)
plt.legend(['not hired people', 'hired people'], loc='lower right')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment