Last active
October 30, 2021 15:55
-
-
Save xor2k/2256abe4106cc8ac04a24f42bd36f2c6 to your computer and use it in GitHub Desktop.
t-Test vs. Equivalence Test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from enum import Enum | |
import matplotlib | |
matplotlib.use('agg') | |
import matplotlib.pyplot as plt | |
import matplotlib.ticker as mtick | |
from scipy.stats import norm | |
import pandas as pd | |
import numpy as np | |
import math | |
from math import sqrt, ceil | |
from itertools import repeat | |
from multiprocessing import Pool | |
import importlib | |
import time | |
Delta = 0.01 | |
alpha_eq = 0.05 | |
beta_eq = 0.05 | |
alpha_t = beta_eq | |
beta_t = alpha_eq | |
sigma_hat = 0.15 | |
sigma_zero_chance = sigma_hat / 2 | |
sigma_to_use = [0.05, sigma_zero_chance, 0.08, 0.1, sigma_hat, 0.2] | |
reference_color = 'gold' | |
colors = [ | |
'tab:red', 'tab:purple', 'tab:pink', 'tab:orange', | |
reference_color, 'tab:green' | |
] | |
simsize = 10000 | |
x_steps_sim = 40 | |
x_steps_analytic = 200 | |
# np.linspace might miss -Delta/Delta due to numerical instability otherwise | |
x_step_decimals = 4 | |
force_no_tensorflow = False | |
# compare e.g. equation 3.6 in https://eprints.whiterose.ac.uk/145474/1/ | |
def success_eq_test(D_bar, sigma, n): | |
return np.clip( | |
norm.cdf( (D_bar + Delta) * sqrt(n)/sigma - norm.ppf(1-alpha_eq/2) ) - | |
norm.cdf( (D_bar - Delta) * sqrt(n)/sigma + norm.ppf(1-alpha_eq/2) ), | |
0, 1) | |
def success_t_test(D_bar, sigma, n): | |
return np.clip( | |
norm.cdf( D_bar * sqrt(n)/sigma + norm.ppf(1-alpha_t/2) )- | |
norm.cdf( D_bar * sqrt(n)/sigma - norm.ppf(1-alpha_t/2) ), | |
0, 1) | |
def eq_test(D_bar, sigma_hat, n): | |
return abs(D_bar) <= Delta - norm.ppf(1-alpha_eq/2)*sigma_hat / sqrt(n) | |
def t_test(D_bar, sigma_hat, n): | |
return abs(D_bar) <= norm.ppf(1-alpha_t/2)*sigma_hat / math.sqrt(n) | |
# symmetric in alpha and beta, works for both t-test and equivalence test | |
def calc_n(sigma): | |
return (norm.ppf(1-alpha_eq/2)+norm.ppf(1-beta_eq/2))**2*(sigma**2)/Delta**2 | |
if __name__ == '__main__': | |
use_tensorflow = importlib.util.find_spec("tensorflow") is not None and \ | |
not force_no_tensorflow | |
def more_or_less_stable_linspace(steps): | |
return np.sort(np.unique(np.append(np.round(np.linspace( | |
-0.02, 0.02, steps + 1), decimals=x_step_decimals), | |
np.array([-Delta, 0, Delta] | |
)))) | |
x_sim = more_or_less_stable_linspace(x_steps_sim) | |
x = more_or_less_stable_linspace(x_steps_analytic) | |
sigma_to_use_len = len(sigma_to_use) | |
reference_index = sigma_to_use.index(sigma_hat) | |
t0 = time.time() | |
df_sim = [] | |
class Mode(Enum): | |
T_TEST = 1 | |
EQ_TEST = 2 | |
if use_tensorflow: | |
import os | |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' | |
# os.environ["CUDA_VISIBLE_DEVICES"] = '-1' | |
import tensorflow as tf | |
tf.random.set_seed(0) | |
for mode in Mode: | |
sr_sim = [] | |
for sigma in sigma_to_use: | |
n = ceil(calc_n(sigma)) | |
sr_sim_ = [] | |
for x_ in x_sim: | |
sim_mean, sim_var = tf.nn.moments(tf.random.normal( | |
(simsize, n), x_, sigma_hat | |
), axes=[1]) | |
sim_std = tf.sqrt(sim_var) | |
sr_sim_ += [ | |
tf.reduce_mean(tf.cast(( | |
eq_test if mode == Mode.EQ_TEST else t_test | |
)( | |
sim_mean, sim_std, n | |
), tf.float32), axis=0).numpy() | |
] | |
sr_sim += [pd.Series( | |
sr_sim_, index=x_sim, name="_hidden" | |
)] | |
df_sim += [pd.concat(sr_sim, axis=1)] | |
else: | |
np.random.seed(0) | |
def worker(args): | |
mode, sigma = args | |
n = ceil(calc_n(sigma)) | |
sr_sim_ = [] | |
for x_ in x_sim: | |
v = np.random.normal(x_, sigma_hat, simsize*n).reshape(( | |
simsize, n | |
)) | |
sim_mean, sim_std = v.mean(axis=1), v.std(axis=1) | |
sr_sim_ += [(eq_test if mode == Mode.EQ_TEST else t_test)( | |
sim_mean, sim_std, n | |
).mean()] | |
return pd.Series( | |
sr_sim_, index=x_sim, name="_hidden" | |
) | |
with Pool() as p: | |
df_sim = [pd.concat(v.get(), axis=1) for v in [p.map_async( | |
worker, zip(repeat(mode), sigma_to_use)) for mode in Mode | |
]] | |
df_sim = pd.concat(df_sim, axis=1, keys=Mode) | |
print(f'simulation duration %.4fs'%(time.time()-t0)) | |
fig, axes = plt.subplots(2, 2) | |
fig.set_figwidth(fig.get_figwidth()*1.5) | |
mid = (fig.subplotpars.right + fig.subplotpars.left)/2 | |
fig.suptitle( | |
f'Test Success Chance: Different Sample Sizes'+ | |
'/Sample Size Calculations (SSC), '+r'$\Delta=d_r=1.0$'+'%.\n'+ | |
' Analytic (Lines) and Normal Distribution Simulation '+ | |
f'(Markers: +, Runs: {simsize})', | |
ha='center' | |
) | |
reference_style = '-' | |
styles = [reference_style]*sigma_to_use_len | |
styles[reference_index] = reference_style | |
transparency = 0.7 | |
color_wrong = 'tab:red' | |
color_right = 'tab:blue' | |
def make_common_axes(ax, yticklabel, y2label, xlabel): | |
ax.xaxis.set_major_formatter(mtick.PercentFormatter(1.0, decimals=1)) | |
ax.set_xlabel(xlabel) | |
if yticklabel: | |
ax.yaxis.set_major_formatter( | |
mtick.PercentFormatter(1.0, decimals=0 | |
)) | |
ax.set_ylabel(f"Success Chance") | |
else: | |
ax.set_yticklabels([]) | |
secundary_yticks=[alpha_eq/2, 1-beta_eq] | |
assert(secundary_yticks == [alpha_t/2, 1-beta_t]) | |
secundary_yaxis = ax.secondary_yaxis('right') | |
secundary_yaxis.set_yticks(secundary_yticks) | |
secundary_yaxis.set_yticklabels([ | |
r'$\alpha_{\mathregular{eq․–test}}/2=$'+'\n'+ | |
r'$\beta_{\mathregular{t–test}}/2=$'+'\n'+ | |
'%.1f%%'%(alpha_eq/2*100)+ | |
'\n', | |
'\n'+ | |
r'$1-\beta_{\mathregular{eq․–test}}=$'+'\n'+ | |
r'$1-\alpha_{\mathregular{t–test}}=$'+'\n'+ | |
'%.0f%%'%((1-beta_eq)*100) | |
] if y2label else []) | |
xlim = (-0.017, 0.017) | |
y_lower_bound = 0 | |
ylim = (-0.03+y_lower_bound, 1.03) | |
ax.set_xlim(xlim) | |
ax.set_ylim(ylim) | |
linewidth = 0.75 | |
grid_linewidth = linewidth | |
thickgrid = { | |
"color": matplotlib.rcParams['grid.color'], | |
"linewidth": grid_linewidth*3, | |
"zorder": -2 | |
} | |
for lim in [-Delta, 0, Delta]: | |
ax.plot([lim, lim], ylim, **thickgrid) | |
for y in secundary_yticks: | |
ax.plot(xlim, (y,y), **thickgrid) | |
ax.grid() | |
ax.set_axisbelow(True) | |
legend = ax.get_legend() | |
if legend is not None: | |
legend.remove() | |
mean_error_mu_text = r"Actual Error $\mu$ " + '\n' | |
# t-test =================================================================== | |
ax_ttest = axes[0,0] | |
lower = None | |
upper = None | |
sr = [] | |
for sigma in sigma_to_use: | |
n = ceil(calc_n(sigma)) | |
y = success_t_test( | |
x, sigma_hat, n | |
) | |
if sigma == sigma_hat / 2: upper = y | |
if sigma == sigma_hat: lower = y | |
sr += [pd.Series( | |
y, index=x, | |
name=r'$\sigma=$'+('% 5.1f%%'%(sigma*100))[1:].replace(' ', ' ')+ | |
r' $\Rightarrow$ $n=$'+('% 5d'%n)[1:].replace(' ', ' ')+ | |
r', $\sigma/\hat{\sigma}=$'+'%.3f'%(sigma/sigma_hat) | |
)] | |
for swap, section in [ | |
[False, x <= -Delta], | |
[True, np.abs(x) <= Delta], | |
[False, x >= Delta] | |
]: | |
ax_ttest.fill_between( | |
x, lower, upper, alpha=transparency/2, where=section, | |
color=color_right if swap else color_wrong, linewidth=0.0 | |
) | |
pd.concat(sr, axis=1).plot( | |
ax=ax_ttest, color=colors, alpha=transparency, style=styles | |
) | |
df_sim[Mode.T_TEST].plot( | |
ax=ax_ttest, linestyle='none', marker='+', | |
alpha=transparency, color=colors, legend=False | |
) | |
make_common_axes( | |
ax_ttest, True, False, mean_error_mu_text + ( | |
r'In the t-Test, $\alpha_{\mathregular{t–test}}$ stays fixed.'+'\n' | |
'(Red, Blue) Areas: additionally (incorrectly, correctly)\n'+ | |
'accepted/rejected'+ | |
r' when not accounting for $\beta_{\mathregular{t–test}}$ in SSC.' | |
)) | |
# equivalence test ========================================================= | |
ax_eqtest = axes[0,1] | |
pd.concat([pd.Series(success_eq_test( | |
x, sigma_hat, ceil(calc_n(sigma)) | |
), index=x) for sigma in sigma_to_use], axis=1).plot( | |
ax=ax_eqtest, color=colors, alpha=transparency, style=styles | |
) | |
df_sim[Mode.EQ_TEST].plot( | |
ax=ax_eqtest, linestyle='none', marker='+', | |
alpha=transparency, color=colors, legend=False | |
) | |
make_common_axes( | |
ax_eqtest, False, True, | |
mean_error_mu_text + | |
r'In the Equivalence Test '+ | |
r'$\alpha_{\mathregular{eq․–test}}$ stays fixed.'+( | |
'\nAnalytic 0% Test Success Chance for '+ | |
r'$\sigma$ ≤ '+('%.1f%%'%(sigma_zero_chance*100)) + '.' if \ | |
alpha_eq == beta_eq else '' | |
)) | |
# reference ================================================================ | |
ax_ref = axes[1,0] | |
df_to_use = pd.Series( | |
success_eq_test(x, sigma_hat, ceil(calc_n(sigma_hat))), index=x | |
) | |
df_to_use.plot( | |
ax=ax_ref, color=reference_color, | |
alpha=transparency, style=reference_style | |
) | |
for swap, section in [ | |
[False, x <= -Delta], | |
[True, np.abs(x) <= Delta], | |
[False, x >= Delta] | |
]: | |
ax_ref.fill_between( | |
x, np.ones_like(x), df_to_use, alpha=transparency, | |
where=section, color=color_wrong if swap else color_right, | |
linewidth=0.0 | |
) | |
ax_ref.fill_between( | |
x, np.zeros_like(x), df_to_use, alpha=transparency, | |
where=section, color=color_right if swap else color_wrong, | |
linewidth=0.0 | |
) | |
make_common_axes( | |
ax_ref, True, True, mean_error_mu_text + | |
'Golden Line: Perfect Sample Size Estimation,\n'+ | |
'(Red, Blue) Areas: (incorrectly, correctly) accepted/rejected.' | |
) | |
# legend =================================================================== | |
legend_axis=axes[1,1] | |
legend_axis.legend( | |
*axes[0,0].get_legend_handles_labels(), | |
title=' Real Standard Deviation '+ | |
r'$\hat{\sigma}=$'+('%.0f%%'%(sigma_hat*100))+',\n'+ | |
'Standard Deviation '+r'$\sigma$'+' used for SSC:', | |
bbox_to_anchor = [1.03, 1.15] | |
) | |
legend_axis.axis('off') | |
fig.subplots_adjust( | |
hspace=0.7, wspace=0.05, left=0.09, right=0.9, top=0.89, bottom=0.05 | |
) | |
# fig.savefig('t_test_vs_equivalence_test.pdf', bbox_inches='tight') | |
fig.savefig('t_test_vs_equivalence_test.png', dpi=300, bbox_inches='tight') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment