Skip to content

Instantly share code, notes, and snippets.

@DustinAlandzes
Last active October 22, 2019 21:50
Show Gist options
  • Save DustinAlandzes/c554c8af3abe72ceaa81a56830df6ade to your computer and use it in GitHub Desktop.
Save DustinAlandzes/c554c8af3abe72ceaa81a56830df6ade to your computer and use it in GitHub Desktop.
## simulate data for ab test
import numpy as np
np.random.seed(42)
from scipy.stats import beta # beta distribution
# 1000 trials in both A and B groups
group_size = 1000
A_group, B_group = np.random.rand(2, group_size)
A_successes = sum(A_group < 0.15) # A variant has 15% success rate
B_successes = sum(B_group < 0.20) # B variant has 20% success rate
A_failures = group_size - A_successes
B_failures = group_size - B_successes
# A posterior = prior + A's data
A_posterior = beta(A_successes + 8,
A_failures + 42)
# B posterior = prior + B's data
B_posterior = beta(B_successes + 8,
B_failures + 42)
## Calculate a p-ish value with Monte Carlo Simulation
import pandas as pd
n_trials = 100000 # 100,000 trials
# draw 100k samples from A and B distributions
A_samples = pd.Series([A_posterior.rvs() for _ in range(n_trials)])
B_samples = pd.Series([B_posterior.rvs() for _ in range(n_trials)])
# how many times did 8 outperform A?
B_wins = sum(B_samples > A_samples)
# percentage of B wins
print(B_wins / n_trials)
## relative performance of B_samples / A_samples
B_relative = B_samples / A_samples
B_relative.hist()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment