outman/ci.py

## ci.py
import numpy as np
import scipy.stats as st
import random

apply_a = []
apply_b = []

## AB 两组人数各为 20000 人
for times in range(20000):
    # AB 每组的报名次数
    apply_a.append(random.randint(0, 10))
    apply_b.append(random.randint(2, 7))

## AB 平均值
mean_a = np.mean(apply_a)
mean_b = np.mean(apply_b)

print('mean_a={},mean_b={}'.format(mean_a, mean_b))

## AB 标准差
std_a = np.std(apply_a)
std_b = np.std(apply_b)

print('std_a={},std_b={}'.format(std_a, std_b))

## 数据分布省略

'''
检验防线
备选假设A版本和B版本有差别，即A版本的平均值≠B版本的平均值，这里使用双尾检验
'''

'''
在零假设成立前提下计算 t 和 p_value
'''
t, p_value = st.ttest_ind(apply_a, apply_b)
print('t={},p-value={}'.format(t, p_value))

'''
判断标准,显著水平 alpha=0.05
'''
alpha = 0.05
if (p_value < alpha / 2):
    print('拒绝零假设，有统计显著，A版本和B版本有差异')
else:
    print('接受零假设，没有统计显著，A版本和B版本没有差异')

'''
自由度
'''
df = len(apply_a) + len(apply_b) - 2

'''
置信水平对应的t值（t_ci）
根据双侧置信度 95% 和自由度 df 查表得到 t_ci 的值为 1.960
'''
t_ci = 1.960

## 计算标准误差
se = np.sqrt(np.square(std_a) / 20000 + np.square(std_b) / 20000)

'''
对于双独立样本检验
置信区间的样本平均值=A版本平均值 - B版本平均值
'''

mean = mean_a - mean_b
x = mean - t_ci * se
y = mean + t_ci * se

print('置信区间 ({}, {})'.format(x, y))

## 效应量计算
## 合并标准差
sp = np.sqrt(((20000-1) * np.square(std_a) + (20000-1) * np.square(std_b)) / (20000+20000-2))
print('效应量 d = {}'.format((mean_a - mean_b) / sp))
	import numpy as np
	import scipy.stats as st
	import random

	apply_a = []
	apply_b = []

	## AB 两组人数各为 20000 人
	for times in range(20000):
	# AB 每组的报名次数
	apply_a.append(random.randint(0, 10))
	apply_b.append(random.randint(2, 7))

	## AB 平均值
	mean_a = np.mean(apply_a)
	mean_b = np.mean(apply_b)

	print('mean_a={},mean_b={}'.format(mean_a, mean_b))

	## AB 标准差
	std_a = np.std(apply_a)
	std_b = np.std(apply_b)

	print('std_a={},std_b={}'.format(std_a, std_b))

	## 数据分布省略

	'''
	检验防线
	备选假设A版本和B版本有差别，即A版本的平均值≠B版本的平均值，这里使用双尾检验
	'''

	'''
	在零假设成立前提下计算 t 和 p_value
	'''
	t, p_value = st.ttest_ind(apply_a, apply_b)
	print('t={},p-value={}'.format(t, p_value))

	'''
	判断标准,显著水平 alpha=0.05
	'''
	alpha = 0.05
	if (p_value < alpha / 2):
	print('拒绝零假设，有统计显著，A版本和B版本有差异')
	else:
	print('接受零假设，没有统计显著，A版本和B版本没有差异')

	'''
	自由度
	'''
	df = len(apply_a) + len(apply_b) - 2

	'''
	置信水平对应的t值（t_ci）
	根据双侧置信度 95% 和自由度 df 查表得到 t_ci 的值为 1.960
	'''
	t_ci = 1.960

	## 计算标准误差
	se = np.sqrt(np.square(std_a) / 20000 + np.square(std_b) / 20000)

	'''
	对于双独立样本检验
	置信区间的样本平均值=A版本平均值 - B版本平均值
	'''

	mean = mean_a - mean_b
	x = mean - t_ci * se
	y = mean + t_ci * se

	print('置信区间 ({}, {})'.format(x, y))

	## 效应量计算
	## 合并标准差
	sp = np.sqrt(((20000-1) * np.square(std_a) + (20000-1) * np.square(std_b)) / (20000+20000-2))
	print('效应量 d = {}'.format((mean_a - mean_b) / sp))