alanf/experiment_simulation.py

## experiment_simulation.py
"""
Run a simulation of multiple concurrent A/B tests acting on a number of users.
Reports on the expected vs. the actual results, and raises a warning if the actual results would lead to the wrong conclusion.
Values of interest:
-- number_of_users : active users who may be part of an A/B test
-- number_of_concurrent_experiments: how many random concurrent experiments to generate
-- values in construct_random_experiments(): define a range and distribution for utility and B cohort percentage for each experiment
"""
import random

__author__ = 'alan fineberg alan@pocketgems.com'

# tweak these!
number_of_users = 100000
number_of_concurrent_experiments = 50

experiment_b_group_size = dict()
experiment_b_group_utility = dict()

def construct_random_experiments(num):
	id = 1
	for _ in range(num):
		# multiply * 100 for the percent
		experiment_b_group_size[id] = random.uniform(.001, .05)
		# this utility could indicate RPU or some other high level metric. Uniform distro is not necessarily the best choice
		experiment_b_group_utility[id] = random.randint(-100, 100)
		id *= 2

""" Generates a random cohort for all A/B tests based on the weights of each A/B test"""
def random_cohort():
	result = 0b0
	for experiment, liklihood in experiment_b_group_size.iteritems():
		if random.random() < liklihood:
			result |= experiment
	return result

""" Reports the total utility gleaned from a user in a single cohort """
def utility_from_experiment(cohort):
	total_utility = 0
	for experiment, utility in experiment_b_group_utility.iteritems():
		if cohort & experiment:
			total_utility += utility
	return total_utility

""" Reports on whether or not a single A/B test was a success. """
def analyze_single_experiment(user_to_cohort, experiment):
	A_total_utility = 0
	B_total_utility = 0

	users_in_A_cohort = 0
	users_in_B_cohort = 0

	for _, cohort in user_to_cohort.iteritems():
		if experiment & cohort:
			B_total_utility += utility_from_experiment(cohort)
			users_in_B_cohort += 1
		else:
			A_total_utility += utility_from_experiment(cohort)
			users_in_A_cohort += 1

	A_avg = A_total_utility * 1.0 / users_in_A_cohort
	if users_in_B_cohort:
		B_avg =  B_total_utility * 1.0 / users_in_B_cohort
	else:
		B_avg = 0

	expected_utility = experiment_b_group_utility[experiment]
	error =	abs(max(B_avg - expected_utility, expected_utility - B_avg))
	try:
		error_percent = abs(int(error / expected_utility * 100))
	except:
		error_percent = 'undefined'

	print '\n%s: %s users in B cohort.\n\t Utility: \n\t\tA cohort %s, \n\t\tB cohort %s' % (experiment, users_in_B_cohort, A_avg, B_avg)
	print '\t\texpected: %s \n\t\tobserved: %s \n\t\terror: %s%%' % (expected_utility, int(B_avg), error_percent)
	if B_avg > A_avg:
		print '\tconclusion: apply experiment %s' % experiment
	else:
		print '\tconclusion: don\'t apply experiment %s' % experiment
	if (B_avg >= 0 and expected_utility < 0) or (B_avg < 0 and expected_utility >= 0):
		print '>>> ALERT! ALERT! BAD ADVICE GIVEN. BAD! <<<'

if __name__ == '__main__':
	print 'running experiment for %s users' % number_of_users
	construct_random_experiments(number_of_concurrent_experiments)
	user_to_cohort = {}
	for i in xrange(number_of_users):
		user_to_cohort[i] = random_cohort()

	for experiment, _ in experiment_b_group_utility.iteritems():
		analyze_single_experiment(user_to_cohort, experiment)
	"""
	Run a simulation of multiple concurrent A/B tests acting on a number of users.
	Reports on the expected vs. the actual results, and raises a warning if the actual results would lead to the wrong conclusion.
	Values of interest:
	-- number_of_users : active users who may be part of an A/B test
	-- number_of_concurrent_experiments: how many random concurrent experiments to generate
	-- values in construct_random_experiments(): define a range and distribution for utility and B cohort percentage for each experiment
	"""
	import random

	__author__ = 'alan fineberg alan@pocketgems.com'

	# tweak these!
	number_of_users = 100000
	number_of_concurrent_experiments = 50

	experiment_b_group_size = dict()
	experiment_b_group_utility = dict()

	def construct_random_experiments(num):
	id = 1
	for _ in range(num):
	# multiply * 100 for the percent
	experiment_b_group_size[id] = random.uniform(.001, .05)
	# this utility could indicate RPU or some other high level metric. Uniform distro is not necessarily the best choice
	experiment_b_group_utility[id] = random.randint(-100, 100)
	id *= 2

	""" Generates a random cohort for all A/B tests based on the weights of each A/B test"""
	def random_cohort():
	result = 0b0
	for experiment, liklihood in experiment_b_group_size.iteritems():
	if random.random() < liklihood:
	result \|= experiment
	return result

	""" Reports the total utility gleaned from a user in a single cohort """
	def utility_from_experiment(cohort):
	total_utility = 0
	for experiment, utility in experiment_b_group_utility.iteritems():
	if cohort & experiment:
	total_utility += utility
	return total_utility

	""" Reports on whether or not a single A/B test was a success. """
	def analyze_single_experiment(user_to_cohort, experiment):
	A_total_utility = 0
	B_total_utility = 0

	users_in_A_cohort = 0
	users_in_B_cohort = 0

	for _, cohort in user_to_cohort.iteritems():
	if experiment & cohort:
	B_total_utility += utility_from_experiment(cohort)
	users_in_B_cohort += 1
	else:
	A_total_utility += utility_from_experiment(cohort)
	users_in_A_cohort += 1

	A_avg = A_total_utility * 1.0 / users_in_A_cohort
	if users_in_B_cohort:
	B_avg = B_total_utility * 1.0 / users_in_B_cohort
	else:
	B_avg = 0

	expected_utility = experiment_b_group_utility[experiment]
	error = abs(max(B_avg - expected_utility, expected_utility - B_avg))
	try:
	error_percent = abs(int(error / expected_utility * 100))
	except:
	error_percent = 'undefined'

	print '\n%s: %s users in B cohort.\n\t Utility: \n\t\tA cohort %s, \n\t\tB cohort %s' % (experiment, users_in_B_cohort, A_avg, B_avg)
	print '\t\texpected: %s \n\t\tobserved: %s \n\t\terror: %s%%' % (expected_utility, int(B_avg), error_percent)
	if B_avg > A_avg:
	print '\tconclusion: apply experiment %s' % experiment
	else:
	print '\tconclusion: don\'t apply experiment %s' % experiment
	if (B_avg >= 0 and expected_utility < 0) or (B_avg < 0 and expected_utility >= 0):
	print '>>> ALERT! ALERT! BAD ADVICE GIVEN. BAD! <<<'

	if __name__ == '__main__':
	print 'running experiment for %s users' % number_of_users
	construct_random_experiments(number_of_concurrent_experiments)
	user_to_cohort = {}
	for i in xrange(number_of_users):
	user_to_cohort[i] = random_cohort()

	for experiment, _ in experiment_b_group_utility.iteritems():
	analyze_single_experiment(user_to_cohort, experiment)