Created
April 14, 2022 11:51
-
-
Save iqbalali/0ed8374185e3acd45a4b720ae5bc2130 to your computer and use it in GitHub Desktop.
Simulate SRM of A/B tests across various traffic volumes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from random import randint | |
from scipy.stats import chisquare | |
import statistics | |
import pandas as pd | |
def traffic_sim(num): | |
a,b =0,0 | |
for x in range(num): | |
randbool = bool(randint(0,1)) | |
if randbool: | |
a +=1 | |
else: | |
b += 1 | |
return (a,b) | |
def srm_check(tup): | |
diff = max(tup) - min(tup) | |
total_traffic = (sum(tup)) | |
expected = sum(tup)/2 | |
perc_diff = (diff/(sum(tup)/2))*100 | |
chi = chisquare(tup, f_exp=expected) | |
srm = False | |
if chi[1] < 0.01: | |
srm = True | |
return ({"srm":srm, 'diff':perc_diff}) | |
def get_samples(traffic, samples): | |
list_results = [] | |
for x in range(samples): | |
list_results.append( srm_check( traffic_sim(traffic) ) ) | |
newlist = sorted(list_results, key=lambda d: d['diff']) | |
return newlist | |
def get_false_positive_rate(list_dictionary): | |
false_positive = 0 | |
for item in list_dictionary: | |
if item['srm'] == True: | |
false_positive += 1 | |
return ( false_positive / len(list_dictionary) )*100 | |
def get_max_diff(list_dictionary, srm_value): | |
my_list = [0] | |
for item in list_dictionary: | |
if item['srm'] == srm_value: | |
my_list.append(item['diff']) | |
return (max(my_list) ) | |
def get_mean_diff(list_dictionary): | |
my_list = [] | |
for item in list_dictionary: | |
my_list.append(item['diff']) | |
return (sum(my_list)/len(my_list)) | |
def get_stdev(list_dictionary): | |
my_list = [] | |
for item in list_dictionary: | |
my_list.append(item['diff']) | |
return statistics.stdev(my_list) | |
def get_dataframe(traffic, samples): | |
results = [] | |
for item in traffic: | |
list_dictionary = get_samples (item, samples) | |
results.append({'Total traffic': item, | |
'SRM Type I rate':str(get_false_positive_rate(list_dictionary))+'%', | |
'Avg diff (SRM=False)': str(get_mean_diff(list_dictionary))+'%', | |
'Standard deviation': get_stdev(list_dictionary), | |
'Max diff (SRM=False)': str(get_max_diff(list_dictionary, False))+'%', | |
'Max diff (SRM=True)': str(get_max_diff(list_dictionary, True))+'%' }) | |
return pd.DataFrame(results) | |
# Simulate traffic differences for [list of traffic volumes]. | |
# Do this for number of [samples] to mimic the number of experiments | |
get_dataframe(traffic=[1000,2000,3000,4000,5000,6000,7000,8000,9000,10000], samples=10000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment