tomron/welchtest.py

## welchtest.py
import pandas as pd
import numpy as np
from scipy import stats


input_file='advertisement_clicks.csv'

df = pd.read_csv(input_file)

a = df[df['advertisement_id']== 'A']['action'].tolist()
b = df[df['advertisement_id']== 'b']['action'].tolist()

N = len(a)
p1 = len([x for x in a if x ==1])/float(N)
p2 = len([x for x in b if x ==1])/float(N)

var1 = p1 * (1-p1)
var2 = p2 * (1-p2)

s = np.sqrt( (var1 + var2) / 2 )

t = (p1 - p2) / (s * np.sqrt(2.0/N)) # t-statistic
df = 2*N - 2 # degrees of freedom
p = 1 - stats.t.cdf(np.abs(t), df=df) # one-sided test p-value
print("t:\t", t, "p:\t", 2*p) # two-sided test p-value

# built-in t-test:
t2, p2 = stats.ttest_ind(a, b)
print("t2:\t", t2, "p2:\t", p2)
	import pandas as pd
	import numpy as np
	from scipy import stats


	input_file='advertisement_clicks.csv'

	df = pd.read_csv(input_file)

	a = df[df['advertisement_id']== 'A']['action'].tolist()
	b = df[df['advertisement_id']== 'b']['action'].tolist()

	N = len(a)
	p1 = len([x for x in a if x ==1])/float(N)
	p2 = len([x for x in b if x ==1])/float(N)

	var1 = p1 * (1-p1)
	var2 = p2 * (1-p2)

	s = np.sqrt( (var1 + var2) / 2 )

	t = (p1 - p2) / (s * np.sqrt(2.0/N)) # t-statistic
	df = 2*N - 2 # degrees of freedom
	p = 1 - stats.t.cdf(np.abs(t), df=df) # one-sided test p-value
	print("t:\t", t, "p:\t", 2*p) # two-sided test p-value

	# built-in t-test:
	t2, p2 = stats.ttest_ind(a, b)
	print("t2:\t", t2, "p2:\t", p2)