|
#!/usr/bin/env python3 |
|
import bisect |
|
import matplotlib |
|
matplotlib.use('agg') |
|
|
|
import numpy as np |
|
from matplotlib import pyplot as plt |
|
|
|
|
|
def generate_bins(pos, neg, step): |
|
lower = min(np.min(pos), np.min(neg)) |
|
upper = max(np.max(pos), np.max(neg)) |
|
return np.arange(lower, upper, step) |
|
|
|
|
|
def estimate_probability_densities(values, bins): |
|
hist, bin_edges = np.histogram(values, bins=bins, density=True) |
|
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 |
|
return bin_centers, hist |
|
|
|
|
|
def estimate_cumulative_probabilities(values, bins): |
|
centers, densities = estimate_probability_densities(values, bins) |
|
cumulative_hist = np.cumsum(densities) |
|
# the values produced by np.histogram are pointwise probability density |
|
# estimates. We need to multiply them by bin width when integrating to get |
|
# cumulative distribution |
|
# NOTE hardcodes assumption that bin widths are constant |
|
step = centers[1] - centers[0] |
|
cumulative_dist = cumulative_hist * step |
|
|
|
return centers, cumulative_dist |
|
|
|
|
|
def plot_densities(ax, pos, neg, bins, candidate_threshold): |
|
pos_x, pos_y = estimate_probability_densities(pos, bins) |
|
neg_x, neg_y = estimate_probability_densities(neg, bins) |
|
|
|
ax.plot(pos_x, pos_y, color='g', label='pos') |
|
ax.plot(neg_x, neg_y, color='r', label='neg') |
|
|
|
ax.vlines(candidate_threshold, 0, max(np.max(pos_y), np.max(neg_y)), |
|
color='b', alpha=.5, label='candidate') |
|
|
|
ax.grid(True) |
|
ax.set_ylabel('probability density') |
|
ax.legend(loc='upper right', fontsize=10) |
|
|
|
|
|
def plot_cumulatives(ax, pos, neg, bins, candidate_threshold): |
|
ax.plot(*estimate_cumulative_probabilities(pos, bins), color='g', label='pos') |
|
ax.plot(*estimate_cumulative_probabilities(neg, bins), color='r', label='neg') |
|
|
|
ax.vlines(candidate_threshold, 0, 1, color='b', alpha=.5, label='candidate') |
|
|
|
ax.grid(True) |
|
ax.set_ylabel('cumulative probability') |
|
ax.legend(loc='lower right', fontsize=10) |
|
|
|
|
|
def plot_roc(ax, pos, neg, bins, candidate_threshold): |
|
_, tnr = estimate_cumulative_probabilities(neg, bins) |
|
_, fnr = estimate_cumulative_probabilities(pos, bins) |
|
|
|
fpr = 1 - tnr |
|
tpr = 1 - fnr |
|
|
|
ax.plot(fpr, tpr, color='k', label='ROC curve') |
|
ax.plot(fpr, fpr, color='k', lw=.2) |
|
|
|
cand_bin_idx = bisect.bisect_left(bins, candidate_threshold) |
|
ax.scatter([fpr[cand_bin_idx]], [tpr[cand_bin_idx]], c='b', s=50, alpha=.5, label='candidate') |
|
|
|
ax.grid(True) |
|
ax.set_xlabel('FPR') |
|
ax.set_ylabel('TPR') |
|
ax.legend(loc='lower right', fontsize=10) |
|
|
|
|
|
def plot_precision_recall(ax, pos, neg, bins, candidate_threshold): |
|
_, tnr = estimate_cumulative_probabilities(neg, bins) |
|
_, fnr = estimate_cumulative_probabilities(pos, bins) |
|
|
|
tpr = 1 - fnr |
|
fpr = 1 - tnr |
|
ppv = tpr * len(pos) / (tpr * len(pos) + fpr * len(neg)) |
|
ax.plot(ppv, tpr, color='k', label='precision-recall curve') |
|
|
|
cand_bin_idx = bisect.bisect_left(bins, candidate_threshold) |
|
ax.scatter([ppv[cand_bin_idx]], [tpr[cand_bin_idx]], c='b', s=50, alpha=.5, label='candidate') |
|
|
|
ax.grid(True) |
|
ax.set_xlabel('PPV') |
|
ax.set_ylabel('TPR') |
|
ax.legend(loc='center left', fontsize=10) |
|
|
|
|
|
if __name__ == '__main__': |
|
pos = np.loadtxt('pos.txt') |
|
neg = np.loadtxt('neg.txt') |
|
threshold = 37.7 |
|
step = 1e-1 |
|
|
|
bins = generate_bins(pos, neg, step) |
|
|
|
fig = plt.figure(figsize=(12, 8)) |
|
|
|
ax = fig.add_subplot(2, 2, 1) |
|
plot_densities(ax, pos, neg, bins, threshold) |
|
|
|
ax = fig.add_subplot(2, 2, 2) |
|
plot_cumulatives(ax, pos, neg, bins, threshold) |
|
|
|
ax = fig.add_subplot(2, 2, 3) |
|
plot_roc(ax, pos, neg, bins, threshold) |
|
|
|
ax = fig.add_subplot(2, 2, 4) |
|
plot_precision_recall(ax, pos, neg, bins, threshold) |
|
|
|
fig.tight_layout() |
|
fig.savefig('plot.png', dpi=180) |