Skip to content

Instantly share code, notes, and snippets.

@kmdupr33
Created April 26, 2026 13:39
Show Gist options
  • Select an option

  • Save kmdupr33/c61593d040234d50533a80d0ed7507a4 to your computer and use it in GitHub Desktop.

Select an option

Save kmdupr33/c61593d040234d50533a80d0ed7507a4 to your computer and use it in GitHub Desktop.
anthropic-error
# %%
import random
# %%
opus_4_6 = [
(1, random.uniform(.05, .9))
if random.random() < 0.8
else (0, random.uniform(.05, .9))
for _ in range(500)
]
mythos = [
(1, min(.9, og[1] + random.uniform(.1, .65)))
if random.random() < 0.5 and og[0] != 1
else og for og in opus_4_6
]
# %%
opus_4_6_results = [m[0] for m in opus_4_6]
opus_4_6_cheating_probs = [m[1] for m in opus_4_6]
mythos_results = [m[0] for m in mythos]
mythos_cheating_probs = [m[1] for m in mythos]
[m for m in mythos_cheating_probs if m > .9]
# %%
import numpy as np
import matplotlib.pyplot as plt
def plot_swe_bench(
opus_successes,
mythos_successes,
opus_memorization_probs,
mythos_memorization_probs,
thresholds=None,
title="SWE-bench Verified",
):
"""
Recreate the SWE-bench figure: pass rate vs memorization threshold.
Each model has its own memorization probability estimates. At threshold t,
a model's pass rate is computed only over problems with memorization
probability <= t (for that model). When no problems are included for a
model at a given threshold, its pass rate is reported as 0.
The bottom panel shows how many problems are included at each threshold,
using the mythos memorization probabilities (the primary filter for the
model being evaluated).
Parameters
----------
opus_successes : array-like of 0/1, length N
mythos_successes : array-like of 0/1, length N
opus_memorization_probs : array-like of floats in [0, 1], length N
mythos_memorization_probs : array-like of floats in [0, 1], length N
thresholds : array-like of floats in [0, 1], optional
title : str
"""
opus = np.asarray(opus_successes, dtype=float)
mythos = np.asarray(mythos_successes, dtype=float)
opus_mem = np.asarray(opus_memorization_probs, dtype=float)
mythos_mem = np.asarray(mythos_memorization_probs, dtype=float)
lengths = {len(opus), len(mythos), len(opus_mem), len(mythos_mem)}
if len(lengths) != 1:
raise ValueError("All four arrays must be the same length.")
if thresholds is None:
thresholds = np.linspace(0, 1, 51)
thresholds = np.asarray(thresholds, dtype=float)
opus_pass = np.zeros_like(thresholds, dtype=float)
mythos_pass = np.zeros_like(thresholds, dtype=float)
included = np.zeros_like(thresholds, dtype=int)
for i, t in enumerate(thresholds):
opus_mask = opus_mem <= t
mythos_mask = mythos_mem <= t
if opus_mask.sum() > 0:
opus_pass[i] = opus[opus_mask].mean()
if mythos_mask.sum() > 0:
mythos_pass[i] = mythos[mythos_mask].mean()
# Histogram tracks the mythos filter (the model under evaluation)
included[i] = mythos_mask.sum()
fig, (ax_top, ax_bot) = plt.subplots(
2,
1,
figsize=(8, 8),
sharex=True,
gridspec_kw={"height_ratios": [3, 1], "hspace": 0.08},
)
mythos_color = "#E07856"
opus_color = "#F2C14E"
ax_top.plot(
thresholds,
mythos_pass * 100,
marker="o",
markersize=4,
color=mythos_color,
label="Claude Mythos Preview",
)
ax_top.plot(
thresholds,
opus_pass * 100,
marker="o",
markersize=4,
color=opus_color,
label="Claude Opus 4.6",
)
ax_top.set_ylim(0, 100)
ax_top.set_ylabel("Pass rate")
ax_top.set_yticks([0, 20, 40, 60, 80, 100])
ax_top.set_yticklabels([f"{v}%" for v in [0, 20, 40, 60, 80, 100]])
ax_top.set_title(title, fontweight="bold")
ax_top.legend(loc="upper center", bbox_to_anchor=(0.5, 1.15), ncol=2, frameon=False)
ax_top.spines["top"].set_visible(False)
ax_top.spines["right"].set_visible(False)
ax_bot.bar(
thresholds,
included,
width=(thresholds[1] - thresholds[0]) * 0.9,
color="#B4B2A9",
edgecolor="white",
linewidth=0.5,
)
ax_bot.set_xlabel("Memorization probability inclusion threshold")
ax_bot.set_ylabel("Problems included")
ax_bot.set_xlim(0, 1)
ax_bot.set_ylim(0, max(included) * 1.05 if included.max() > 0 else 1)
ax_bot.spines["top"].set_visible(False)
ax_bot.spines["right"].set_visible(False)
plt.tight_layout()
return fig, (ax_top, ax_bot)
if __name__ == "__main__":
plot_swe_bench(
opus_4_6_results, mythos_results, opus_4_6_cheating_probs, mythos_cheating_probs
)
plt.show()
# %%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment