-
-
Save kmdupr33/c61593d040234d50533a80d0ed7507a4 to your computer and use it in GitHub Desktop.
anthropic-error
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # %% | |
| import random | |
| # %% | |
| opus_4_6 = [ | |
| (1, random.uniform(.05, .9)) | |
| if random.random() < 0.8 | |
| else (0, random.uniform(.05, .9)) | |
| for _ in range(500) | |
| ] | |
| mythos = [ | |
| (1, min(.9, og[1] + random.uniform(.1, .65))) | |
| if random.random() < 0.5 and og[0] != 1 | |
| else og for og in opus_4_6 | |
| ] | |
| # %% | |
| opus_4_6_results = [m[0] for m in opus_4_6] | |
| opus_4_6_cheating_probs = [m[1] for m in opus_4_6] | |
| mythos_results = [m[0] for m in mythos] | |
| mythos_cheating_probs = [m[1] for m in mythos] | |
| [m for m in mythos_cheating_probs if m > .9] | |
| # %% | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| def plot_swe_bench( | |
| opus_successes, | |
| mythos_successes, | |
| opus_memorization_probs, | |
| mythos_memorization_probs, | |
| thresholds=None, | |
| title="SWE-bench Verified", | |
| ): | |
| """ | |
| Recreate the SWE-bench figure: pass rate vs memorization threshold. | |
| Each model has its own memorization probability estimates. At threshold t, | |
| a model's pass rate is computed only over problems with memorization | |
| probability <= t (for that model). When no problems are included for a | |
| model at a given threshold, its pass rate is reported as 0. | |
| The bottom panel shows how many problems are included at each threshold, | |
| using the mythos memorization probabilities (the primary filter for the | |
| model being evaluated). | |
| Parameters | |
| ---------- | |
| opus_successes : array-like of 0/1, length N | |
| mythos_successes : array-like of 0/1, length N | |
| opus_memorization_probs : array-like of floats in [0, 1], length N | |
| mythos_memorization_probs : array-like of floats in [0, 1], length N | |
| thresholds : array-like of floats in [0, 1], optional | |
| title : str | |
| """ | |
| opus = np.asarray(opus_successes, dtype=float) | |
| mythos = np.asarray(mythos_successes, dtype=float) | |
| opus_mem = np.asarray(opus_memorization_probs, dtype=float) | |
| mythos_mem = np.asarray(mythos_memorization_probs, dtype=float) | |
| lengths = {len(opus), len(mythos), len(opus_mem), len(mythos_mem)} | |
| if len(lengths) != 1: | |
| raise ValueError("All four arrays must be the same length.") | |
| if thresholds is None: | |
| thresholds = np.linspace(0, 1, 51) | |
| thresholds = np.asarray(thresholds, dtype=float) | |
| opus_pass = np.zeros_like(thresholds, dtype=float) | |
| mythos_pass = np.zeros_like(thresholds, dtype=float) | |
| included = np.zeros_like(thresholds, dtype=int) | |
| for i, t in enumerate(thresholds): | |
| opus_mask = opus_mem <= t | |
| mythos_mask = mythos_mem <= t | |
| if opus_mask.sum() > 0: | |
| opus_pass[i] = opus[opus_mask].mean() | |
| if mythos_mask.sum() > 0: | |
| mythos_pass[i] = mythos[mythos_mask].mean() | |
| # Histogram tracks the mythos filter (the model under evaluation) | |
| included[i] = mythos_mask.sum() | |
| fig, (ax_top, ax_bot) = plt.subplots( | |
| 2, | |
| 1, | |
| figsize=(8, 8), | |
| sharex=True, | |
| gridspec_kw={"height_ratios": [3, 1], "hspace": 0.08}, | |
| ) | |
| mythos_color = "#E07856" | |
| opus_color = "#F2C14E" | |
| ax_top.plot( | |
| thresholds, | |
| mythos_pass * 100, | |
| marker="o", | |
| markersize=4, | |
| color=mythos_color, | |
| label="Claude Mythos Preview", | |
| ) | |
| ax_top.plot( | |
| thresholds, | |
| opus_pass * 100, | |
| marker="o", | |
| markersize=4, | |
| color=opus_color, | |
| label="Claude Opus 4.6", | |
| ) | |
| ax_top.set_ylim(0, 100) | |
| ax_top.set_ylabel("Pass rate") | |
| ax_top.set_yticks([0, 20, 40, 60, 80, 100]) | |
| ax_top.set_yticklabels([f"{v}%" for v in [0, 20, 40, 60, 80, 100]]) | |
| ax_top.set_title(title, fontweight="bold") | |
| ax_top.legend(loc="upper center", bbox_to_anchor=(0.5, 1.15), ncol=2, frameon=False) | |
| ax_top.spines["top"].set_visible(False) | |
| ax_top.spines["right"].set_visible(False) | |
| ax_bot.bar( | |
| thresholds, | |
| included, | |
| width=(thresholds[1] - thresholds[0]) * 0.9, | |
| color="#B4B2A9", | |
| edgecolor="white", | |
| linewidth=0.5, | |
| ) | |
| ax_bot.set_xlabel("Memorization probability inclusion threshold") | |
| ax_bot.set_ylabel("Problems included") | |
| ax_bot.set_xlim(0, 1) | |
| ax_bot.set_ylim(0, max(included) * 1.05 if included.max() > 0 else 1) | |
| ax_bot.spines["top"].set_visible(False) | |
| ax_bot.spines["right"].set_visible(False) | |
| plt.tight_layout() | |
| return fig, (ax_top, ax_bot) | |
| if __name__ == "__main__": | |
| plot_swe_bench( | |
| opus_4_6_results, mythos_results, opus_4_6_cheating_probs, mythos_cheating_probs | |
| ) | |
| plt.show() | |
| # %% | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment