Skip to content

Instantly share code, notes, and snippets.

@iqbalali
Created April 14, 2022 11:51
Show Gist options
  • Save iqbalali/0ed8374185e3acd45a4b720ae5bc2130 to your computer and use it in GitHub Desktop.
Save iqbalali/0ed8374185e3acd45a4b720ae5bc2130 to your computer and use it in GitHub Desktop.
Simulate SRM of A/B tests across various traffic volumes
from random import randint
from scipy.stats import chisquare
import statistics
import pandas as pd
def traffic_sim(num):
a,b =0,0
for x in range(num):
randbool = bool(randint(0,1))
if randbool:
a +=1
else:
b += 1
return (a,b)
def srm_check(tup):
diff = max(tup) - min(tup)
total_traffic = (sum(tup))
expected = sum(tup)/2
perc_diff = (diff/(sum(tup)/2))*100
chi = chisquare(tup, f_exp=expected)
srm = False
if chi[1] < 0.01:
srm = True
return ({"srm":srm, 'diff':perc_diff})
def get_samples(traffic, samples):
list_results = []
for x in range(samples):
list_results.append( srm_check( traffic_sim(traffic) ) )
newlist = sorted(list_results, key=lambda d: d['diff'])
return newlist
def get_false_positive_rate(list_dictionary):
false_positive = 0
for item in list_dictionary:
if item['srm'] == True:
false_positive += 1
return ( false_positive / len(list_dictionary) )*100
def get_max_diff(list_dictionary, srm_value):
my_list = [0]
for item in list_dictionary:
if item['srm'] == srm_value:
my_list.append(item['diff'])
return (max(my_list) )
def get_mean_diff(list_dictionary):
my_list = []
for item in list_dictionary:
my_list.append(item['diff'])
return (sum(my_list)/len(my_list))
def get_stdev(list_dictionary):
my_list = []
for item in list_dictionary:
my_list.append(item['diff'])
return statistics.stdev(my_list)
def get_dataframe(traffic, samples):
results = []
for item in traffic:
list_dictionary = get_samples (item, samples)
results.append({'Total traffic': item,
'SRM Type I rate':str(get_false_positive_rate(list_dictionary))+'%',
'Avg diff (SRM=False)': str(get_mean_diff(list_dictionary))+'%',
'Standard deviation': get_stdev(list_dictionary),
'Max diff (SRM=False)': str(get_max_diff(list_dictionary, False))+'%',
'Max diff (SRM=True)': str(get_max_diff(list_dictionary, True))+'%' })
return pd.DataFrame(results)
# Simulate traffic differences for [list of traffic volumes].
# Do this for number of [samples] to mimic the number of experiments
get_dataframe(traffic=[1000,2000,3000,4000,5000,6000,7000,8000,9000,10000], samples=10000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment