Skip to content

Instantly share code, notes, and snippets.

@ian-whitestone
Last active February 16, 2021 02:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ian-whitestone/5f2684a6f2ccf625dfd6b239a34cee07 to your computer and use it in GitHub Desktop.
Save ian-whitestone/5f2684a6f2ccf625dfd6b239a34cee07 to your computer and use it in GitHub Desktop.
Bayesian conversion test A/A simulations
import numpy as np
def bayesian_rate_comparison(
control_successes,
control_samples,
test_successes,
test_samples,
prior_a=0,
prior_b=0
):
num_samples = 50000
control_samples = np.random.beta(
control_successes + prior_a,
control_samples - control_successes + prior_b,
size=num_samples
)
test_samples = np.random.beta(
test_successes + prior_a,
test_samples - test_successes + prior_b,
size=num_samples
)
p_test_gt_control = (test_samples > control_samples).mean()
p_control_gt_test = (control_samples > test_samples).mean()
return p_test_gt_control, p_control_gt_test
conversion_rate = 0.2
num_sessions = 10000
prior_strength = 1000
prior_probability = conversion_rate
prior_a, prior_b = prior_strength * prior_probability, prior_strength * (1 - prior_probability)
bayesian_probas = []
bayesian_probas_w_prior = []
for _ in range(0, 5000):
control = np.random.choice([0, 1], p=[1-conversion_rate, conversion_rate], size=num_sessions)
test = np.random.choice([0, 1], p=[1-conversion_rate, conversion_rate], size=num_sessions)
p_test_gt_control, p_control_gt_test = bayesian_rate_comparison(
control.sum(),
num_sessions,
test.sum(),
num_sessions
)
bayesian_probas.append((p_test_gt_control, p_control_gt_test))
p_test_gt_control, p_control_gt_test = bayesian_rate_comparison(
control.sum(),
num_sessions,
test.sum(),
num_sessions,
prior_a,
prior_b
)
bayesian_probas_w_prior.append((p_test_gt_control, p_control_gt_test))
for test_type, probas in {'with no priors': bayesian_probas, 'with priors': bayesian_probas_w_prior}.items():
print(f"\nResults for bayesian test {test_type}")
for threshold in [0.975, 0.85]:
false_positives = [
1
for p_test_gt_control, p_control_gt_test in probas
if (p_test_gt_control >= threshold or p_control_gt_test >= threshold)
]
print(
f"Detected {len(false_positives)} false positives in {len(probas)} simulated experiments "
f"({len(false_positives)/len(probas):0.2%}) at {threshold:0.1%} threshold"
)
@ian-whitestone
Copy link
Author

ian-whitestone commented Feb 16, 2021

Results

Results for bayesian test with no priors
Detected 269 false positives in 5000 simulated experiments (5.38%) at 97.5% threshold
Detected 1520 false positives in 5000 simulated experiments (30.40%) at 85.0% threshold

Results for bayesian test with priors
Detected 218 false positives in 5000 simulated experiments (4.36%) at 97.5% threshold
Detected 1410 false positives in 5000 simulated experiments (28.20%) at 85.0% threshold

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment