Skip to content

Instantly share code, notes, and snippets.

@alanf
Created April 24, 2011 00:41
Show Gist options
  • Save alanf/939174 to your computer and use it in GitHub Desktop.
Save alanf/939174 to your computer and use it in GitHub Desktop.
use to test concurrent a/b tests
"""
Run a simulation of multiple concurrent A/B tests acting on a number of users.
Reports on the expected vs. the actual results, and raises a warning if the actual results would lead to the wrong conclusion.
Values of interest:
-- number_of_users : active users who may be part of an A/B test
-- number_of_concurrent_experiments: how many random concurrent experiments to generate
-- values in construct_random_experiments(): define a range and distribution for utility and B cohort percentage for each experiment
"""
import random
__author__ = 'alan fineberg alan@pocketgems.com'
# tweak these!
number_of_users = 100000
number_of_concurrent_experiments = 50
experiment_b_group_size = dict()
experiment_b_group_utility = dict()
def construct_random_experiments(num):
id = 1
for _ in range(num):
# multiply * 100 for the percent
experiment_b_group_size[id] = random.uniform(.001, .05)
# this utility could indicate RPU or some other high level metric. Uniform distro is not necessarily the best choice
experiment_b_group_utility[id] = random.randint(-100, 100)
id *= 2
""" Generates a random cohort for all A/B tests based on the weights of each A/B test"""
def random_cohort():
result = 0b0
for experiment, liklihood in experiment_b_group_size.iteritems():
if random.random() < liklihood:
result |= experiment
return result
""" Reports the total utility gleaned from a user in a single cohort """
def utility_from_experiment(cohort):
total_utility = 0
for experiment, utility in experiment_b_group_utility.iteritems():
if cohort & experiment:
total_utility += utility
return total_utility
""" Reports on whether or not a single A/B test was a success. """
def analyze_single_experiment(user_to_cohort, experiment):
A_total_utility = 0
B_total_utility = 0
users_in_A_cohort = 0
users_in_B_cohort = 0
for _, cohort in user_to_cohort.iteritems():
if experiment & cohort:
B_total_utility += utility_from_experiment(cohort)
users_in_B_cohort += 1
else:
A_total_utility += utility_from_experiment(cohort)
users_in_A_cohort += 1
A_avg = A_total_utility * 1.0 / users_in_A_cohort
if users_in_B_cohort:
B_avg = B_total_utility * 1.0 / users_in_B_cohort
else:
B_avg = 0
expected_utility = experiment_b_group_utility[experiment]
error = abs(max(B_avg - expected_utility, expected_utility - B_avg))
try:
error_percent = abs(int(error / expected_utility * 100))
except:
error_percent = 'undefined'
print '\n%s: %s users in B cohort.\n\t Utility: \n\t\tA cohort %s, \n\t\tB cohort %s' % (experiment, users_in_B_cohort, A_avg, B_avg)
print '\t\texpected: %s \n\t\tobserved: %s \n\t\terror: %s%%' % (expected_utility, int(B_avg), error_percent)
if B_avg > A_avg:
print '\tconclusion: apply experiment %s' % experiment
else:
print '\tconclusion: don\'t apply experiment %s' % experiment
if (B_avg >= 0 and expected_utility < 0) or (B_avg < 0 and expected_utility >= 0):
print '>>> ALERT! ALERT! BAD ADVICE GIVEN. BAD! <<<'
if __name__ == '__main__':
print 'running experiment for %s users' % number_of_users
construct_random_experiments(number_of_concurrent_experiments)
user_to_cohort = {}
for i in xrange(number_of_users):
user_to_cohort[i] = random_cohort()
for experiment, _ in experiment_b_group_utility.iteritems():
analyze_single_experiment(user_to_cohort, experiment)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment