Skip to content

Instantly share code, notes, and snippets.

@amueller
Created May 17, 2017 15:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amueller/07a57361cf0fc4bebeee00429bb4d351 to your computer and use it in GitHub Desktop.
Save amueller/07a57361cf0fc4bebeee00429bb4d351 to your computer and use it in GitHub Desktop.
import numpy as np
import matplotlib.pyplot as plt
class Curve(object):
def __init__(self, scores, to="B+", std_adjust=0):
self.to = to
self.scores = scores
self.letters = ["A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D", "F"]
idx = self.letters.index(to)
# +3 is because we do D and F manually
raw_cuts = (np.arange(-len(letters) + 3, 1) + idx) / 3.
# inliers are those that should pass
inliers = scores[scores>20]
mean = np.mean(inliers)
std = np.std(inliers) + std_adjust
cuts_org = mean + raw_cuts * std
# add top bin, D an F - make sure lowest inlier gets D, not F.
cuts = np.hstack([[scores.min() - .01, inliers.min() - .01], cuts_org, [scores.max()]])
# move C- to the center of C and D
cuts[2] = (cuts[1] + cuts[3]) / 2
self.std = std
self.cuts = cuts
self.inliers = inliers
def grade(self, scores=None):
if scores is None:
scores = self.scores
return pd.cut(self.scores, self.cuts, labels=self.letters[::-1])
def plot_hist(self):
bins = np.arange(0, self.scores.max(), self.std / 3.)
distances = np.abs(bins.reshape(-1, 1) - self.cuts)
# reassign to grade boundaries if already close
assign = np.min(distances, axis=1) < self.std / 6.
inds = np.argmin(distances, axis=1)
bins[assign] = self.cuts[inds][assign]
self.scores.hist(bins=bins)
plt.xticks(self.cuts, self.letters[::-1])
def plot_bar(self):
grades = pd.cut(self.scores, self.cuts, labels=self.letters[::-1])
grades.value_counts(sort=False).plot(kind="bar")
def plot_curve(self, scores=None, include_outliers=False):
lines = self.cuts if include_outliers else self.cuts[1:]
plt.hlines(lines, 0, 100, linewidth=.1)
if scores is not None:
plt.plot(np.sort(scores)[::-1], 'o', markersize=1)
elif include_outliers:
plt.plot(np.sort(self.scores)[::-1], 'o', markersize=1)
else:
plt.plot(np.sort(self.inliers)[::-1], 'o', markersize=1)
letters = self.letters[::-1] if include_outliers else self.letters[::-1][1:]
plt.yticks(lines, ["{}: {:.0f}".format(l, c) for c, l in zip(lines, letters)])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment