iqbalali/ssrm-false-positive.py

## ssrm-false-positive.py
from random import randint
from scipy.stats import chisquare

def traffic_sim(increment, checks):
    traffic = []
    a,b =0,0
    for c in range(checks):
        for i in range(increment):
            if randint(0,1) == 1:
                a +=1
            else:
                b += 1
        traffic.append((a,b))
    return (traffic)

def srm_check(tup):
    diff = max(tup) - min(tup)
    total_traffic = (sum(tup))
    expected = sum(tup)/2
    perc_diff = (diff/(sum(tup)/2))*100
    chi = chisquare(tup, f_exp=expected)
    srm = False
    if chi[1] < 0.01:
        srm = True
    return ({"srm":srm, 'diff':perc_diff})

def get_false_positive_rate(list_dictionary):
    false_positive = 0
    for item in list_dictionary:
        if item['srm'] == True:
            false_positive += 1
    return ( false_positive / len(list_dictionary) )*100

def get_false_pos_incremental(samples, increment, checks):
    list_dictionary = []
    for x in range(samples):
        traff_increment_list = traffic_sim(increment, checks)
        srm = {'srm': False, 'diff': 0}
        for t in traff_increment_list:
            if (srm_check(t)['srm']):
                srm = srm_check(t)
        list_dictionary.append(srm)
    return ( get_false_positive_rate(list_dictionary) )

# Simulate 1,000 experimenters checking their site for SRM 1,000 times.
# Each time they check, the traffic volume has incremented by 100
get_false_pos_incremental(samples=10000, increment=100, checks=1000)
	from random import randint
	from scipy.stats import chisquare

	def traffic_sim(increment, checks):
	traffic = []
	a,b =0,0
	for c in range(checks):
	for i in range(increment):
	if randint(0,1) == 1:
	a +=1
	else:
	b += 1
	traffic.append((a,b))
	return (traffic)

	def srm_check(tup):
	diff = max(tup) - min(tup)
	total_traffic = (sum(tup))
	expected = sum(tup)/2
	perc_diff = (diff/(sum(tup)/2))*100
	chi = chisquare(tup, f_exp=expected)
	srm = False
	if chi[1] < 0.01:
	srm = True
	return ({"srm":srm, 'diff':perc_diff})

	def get_false_positive_rate(list_dictionary):
	false_positive = 0
	for item in list_dictionary:
	if item['srm'] == True:
	false_positive += 1
	return ( false_positive / len(list_dictionary) )*100

	def get_false_pos_incremental(samples, increment, checks):
	list_dictionary = []
	for x in range(samples):
	traff_increment_list = traffic_sim(increment, checks)
	srm = {'srm': False, 'diff': 0}
	for t in traff_increment_list:
	if (srm_check(t)['srm']):
	srm = srm_check(t)
	list_dictionary.append(srm)
	return ( get_false_positive_rate(list_dictionary) )

	# Simulate 1,000 experimenters checking their site for SRM 1,000 times.
	# Each time they check, the traffic volume has incremented by 100
	get_false_pos_incremental(samples=10000, increment=100, checks=1000)