Skip to content

Instantly share code, notes, and snippets.

@tade0726
Last active March 15, 2021 09:38
Show Gist options
  • Save tade0726/c1a263ae29fbae8f19d0dfa6e00d4e53 to your computer and use it in GitHub Desktop.
Save tade0726/c1a263ae29fbae8f19d0dfa6e00d4e53 to your computer and use it in GitHub Desktop.
simple script for significant and power report
import numpy as np
import scipy.stats
from statsmodels.stats.proportion import proportions_ztest
def get_p1_p2_alternative(s, t):
"""
return alternative base on actual p1, p2
"""
s = np.array(s)
t = np.array(t)
p = s / t
return "smaller" if p[0] < p[1] else "larger"
def z_test(s, t, alpha=0.05, alternative='two-sided'):
stat, p = proportions_ztest(s, t, alternative=alternative)
return stat, p, p <= alpha
def get_power(s, t, alpha, alternative="two-sided"):
s = np.array(s)
t = np.array(t)
p1, p2 = tuple(s / t)
n = t.sum()
alpha = alpha
believe = alpha / 2 if alternative == "two-sided" else alpha
qu = scipy.stats.norm.ppf(1 - believe)
diff = abs(p2 - p1)
bp = (p1 + p2) / 2
v1 = p1 * (1 - p1)
v2 = p2 * (1 - p2)
bv = bp * (1 - bp)
power_part_one = scipy.stats.norm.cdf((n ** 0.5 * diff - qu * (2 * bv) ** 0.5) / (v1 + v2) ** 0.5)
power_part_two = 1 - scipy.stats.norm.cdf((n ** 0.5 * diff + qu * (2 * bv) ** 0.5) / (v1 + v2) ** 0.5)
power = power_part_one + power_part_two
if alternative == "two-sided":
return power
elif alternative in ("larger", "smaller"):
return max(power_part_one, power_part_two)
else:
raise ValueError()
def full_report(s, t, alpha, is_two_sided=True):
"""I assume the alternative is the same direction as the actual p1, p2 values.
"""
alternative = get_p1_p2_alternative(s, t) if not is_two_sided else "two-sided"
z_stats, p, sign = z_test(s, t, alpha=alpha, alternative=alternative)
power = get_power(s, t, alpha=alpha, alternative=alternative)
s = np.array(s)
t = np.array(t)
means = s / t
p1 = means[0]
p2 = means[1]
change = (p2 / p1) - 1
return {
"alternative": alternative,
"change": change,
"p1": p1,
"p2": p2,
"p-value": p,
"significant": sign,
"z_stats": z_stats,
"power": power,
}
if __name__ == "__main__":
# first column should be control (version A)
# sec column should be treatment (version B)
test_data = [
(6500, 6694), # total
(145, 120) # conversion
]
results = full_report(test_data[1], test_data[0], 0.05)
print(results)
"""
{'alternative': 'two-sided', 'change': -0.19639821559193515, 'p1': 0.022307692307692306, 'p2': 0.0179265013444876, 'p-value': 0.0729107128409782, 'significant': False, 'z_stats': 1.7933892027166773, 'power': 0.717231252963892}
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment