pavlin-policar/confidence_histogram.py

## confidence_histogram.py
def confidence_histogram(y_true, y_probs, n_bins=10, ax=None):
    if ax is None:
        fig, ax = plt.subplots(figsize=(4, 4))

    confidences = np.max(y_probs, axis=1)
    predictions = np.argmax(y_probs, axis=1)
    accuracies = predictions == y_true

    bins = np.linspace(0, 1 - 1 / n_bins, n_bins)
    bin_indices = np.digitize(confidences, bins=bins[1:])
    bin_accuracy = np.zeros(n_bins, dtype=np.float64)
    bin_confidence = np.zeros(n_bins, dtype=np.float64)
    for idx in np.unique(bin_indices):
        mask = bin_indices == idx
        bin_accuracy[idx] = np.mean(accuracies[mask])
        bin_confidence[idx] = np.mean(np.max(y_probs[mask], axis=1))

    ax.grid(zorder=1, linestyle="dotted")
    width = bins[1] - bins[0]
    confs = ax.bar(bins, bin_accuracy, width=width, edgecolor="k", align="edge", zorder=2)
    gaps = ax.bar(
        bins, bin_confidence - bin_accuracy, bottom=bin_accuracy, color=[1, 0.7, 0.7],
        alpha=0.5, width=width, hatch="//", edgecolor="r", align="edge", zorder=2,
    )
    ax.plot([0, 1], [0, 1], c="gray", linestyle="dashed")

    ax.legend([confs, gaps], ["Outputs", "Gap"], loc="best", fontsize="small")

    # Clean up
    ax.set_ylabel("Accuracy")
    ax.set_xlabel("Confidence")
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)

    return ax
	def confidence_histogram(y_true, y_probs, n_bins=10, ax=None):
	if ax is None:
	fig, ax = plt.subplots(figsize=(4, 4))

	confidences = np.max(y_probs, axis=1)
	predictions = np.argmax(y_probs, axis=1)
	accuracies = predictions == y_true

	bins = np.linspace(0, 1 - 1 / n_bins, n_bins)
	bin_indices = np.digitize(confidences, bins=bins[1:])
	bin_accuracy = np.zeros(n_bins, dtype=np.float64)
	bin_confidence = np.zeros(n_bins, dtype=np.float64)
	for idx in np.unique(bin_indices):
	mask = bin_indices == idx
	bin_accuracy[idx] = np.mean(accuracies[mask])
	bin_confidence[idx] = np.mean(np.max(y_probs[mask], axis=1))

	ax.grid(zorder=1, linestyle="dotted")
	width = bins[1] - bins[0]
	confs = ax.bar(bins, bin_accuracy, width=width, edgecolor="k", align="edge", zorder=2)
	gaps = ax.bar(
	bins, bin_confidence - bin_accuracy, bottom=bin_accuracy, color=[1, 0.7, 0.7],
	alpha=0.5, width=width, hatch="//", edgecolor="r", align="edge", zorder=2,
	)
	ax.plot([0, 1], [0, 1], c="gray", linestyle="dashed")

	ax.legend([confs, gaps], ["Outputs", "Gap"], loc="best", fontsize="small")

	# Clean up
	ax.set_ylabel("Accuracy")
	ax.set_xlabel("Confidence")
	ax.set_xlim(0, 1)
	ax.set_ylim(0, 1)

	return ax