Skip to content

Instantly share code, notes, and snippets.

@xor2k
Last active October 30, 2021 15:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xor2k/2256abe4106cc8ac04a24f42bd36f2c6 to your computer and use it in GitHub Desktop.
Save xor2k/2256abe4106cc8ac04a24f42bd36f2c6 to your computer and use it in GitHub Desktop.
t-Test vs. Equivalence Test
from enum import Enum
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
from scipy.stats import norm
import pandas as pd
import numpy as np
import math
from math import sqrt, ceil
from itertools import repeat
from multiprocessing import Pool
import importlib
import time
Delta = 0.01
alpha_eq = 0.05
beta_eq = 0.05
alpha_t = beta_eq
beta_t = alpha_eq
sigma_hat = 0.15
sigma_zero_chance = sigma_hat / 2
sigma_to_use = [0.05, sigma_zero_chance, 0.08, 0.1, sigma_hat, 0.2]
reference_color = 'gold'
colors = [
'tab:red', 'tab:purple', 'tab:pink', 'tab:orange',
reference_color, 'tab:green'
]
simsize = 10000
x_steps_sim = 40
x_steps_analytic = 200
# np.linspace might miss -Delta/Delta due to numerical instability otherwise
x_step_decimals = 4
force_no_tensorflow = False
# compare e.g. equation 3.6 in https://eprints.whiterose.ac.uk/145474/1/
def success_eq_test(D_bar, sigma, n):
return np.clip(
norm.cdf( (D_bar + Delta) * sqrt(n)/sigma - norm.ppf(1-alpha_eq/2) ) -
norm.cdf( (D_bar - Delta) * sqrt(n)/sigma + norm.ppf(1-alpha_eq/2) ),
0, 1)
def success_t_test(D_bar, sigma, n):
return np.clip(
norm.cdf( D_bar * sqrt(n)/sigma + norm.ppf(1-alpha_t/2) )-
norm.cdf( D_bar * sqrt(n)/sigma - norm.ppf(1-alpha_t/2) ),
0, 1)
def eq_test(D_bar, sigma_hat, n):
return abs(D_bar) <= Delta - norm.ppf(1-alpha_eq/2)*sigma_hat / sqrt(n)
def t_test(D_bar, sigma_hat, n):
return abs(D_bar) <= norm.ppf(1-alpha_t/2)*sigma_hat / math.sqrt(n)
# symmetric in alpha and beta, works for both t-test and equivalence test
def calc_n(sigma):
return (norm.ppf(1-alpha_eq/2)+norm.ppf(1-beta_eq/2))**2*(sigma**2)/Delta**2
if __name__ == '__main__':
use_tensorflow = importlib.util.find_spec("tensorflow") is not None and \
not force_no_tensorflow
def more_or_less_stable_linspace(steps):
return np.sort(np.unique(np.append(np.round(np.linspace(
-0.02, 0.02, steps + 1), decimals=x_step_decimals),
np.array([-Delta, 0, Delta]
))))
x_sim = more_or_less_stable_linspace(x_steps_sim)
x = more_or_less_stable_linspace(x_steps_analytic)
sigma_to_use_len = len(sigma_to_use)
reference_index = sigma_to_use.index(sigma_hat)
t0 = time.time()
df_sim = []
class Mode(Enum):
T_TEST = 1
EQ_TEST = 2
if use_tensorflow:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
import tensorflow as tf
tf.random.set_seed(0)
for mode in Mode:
sr_sim = []
for sigma in sigma_to_use:
n = ceil(calc_n(sigma))
sr_sim_ = []
for x_ in x_sim:
sim_mean, sim_var = tf.nn.moments(tf.random.normal(
(simsize, n), x_, sigma_hat
), axes=[1])
sim_std = tf.sqrt(sim_var)
sr_sim_ += [
tf.reduce_mean(tf.cast((
eq_test if mode == Mode.EQ_TEST else t_test
)(
sim_mean, sim_std, n
), tf.float32), axis=0).numpy()
]
sr_sim += [pd.Series(
sr_sim_, index=x_sim, name="_hidden"
)]
df_sim += [pd.concat(sr_sim, axis=1)]
else:
np.random.seed(0)
def worker(args):
mode, sigma = args
n = ceil(calc_n(sigma))
sr_sim_ = []
for x_ in x_sim:
v = np.random.normal(x_, sigma_hat, simsize*n).reshape((
simsize, n
))
sim_mean, sim_std = v.mean(axis=1), v.std(axis=1)
sr_sim_ += [(eq_test if mode == Mode.EQ_TEST else t_test)(
sim_mean, sim_std, n
).mean()]
return pd.Series(
sr_sim_, index=x_sim, name="_hidden"
)
with Pool() as p:
df_sim = [pd.concat(v.get(), axis=1) for v in [p.map_async(
worker, zip(repeat(mode), sigma_to_use)) for mode in Mode
]]
df_sim = pd.concat(df_sim, axis=1, keys=Mode)
print(f'simulation duration %.4fs'%(time.time()-t0))
fig, axes = plt.subplots(2, 2)
fig.set_figwidth(fig.get_figwidth()*1.5)
mid = (fig.subplotpars.right + fig.subplotpars.left)/2
fig.suptitle(
f'Test Success Chance: Different Sample Sizes'+
'/Sample Size Calculations (SSC), '+r'$\Delta=d_r=1.0$'+'%.\n'+
' Analytic (Lines) and Normal Distribution Simulation '+
f'(Markers: +, Runs: {simsize})',
ha='center'
)
reference_style = '-'
styles = [reference_style]*sigma_to_use_len
styles[reference_index] = reference_style
transparency = 0.7
color_wrong = 'tab:red'
color_right = 'tab:blue'
def make_common_axes(ax, yticklabel, y2label, xlabel):
ax.xaxis.set_major_formatter(mtick.PercentFormatter(1.0, decimals=1))
ax.set_xlabel(xlabel)
if yticklabel:
ax.yaxis.set_major_formatter(
mtick.PercentFormatter(1.0, decimals=0
))
ax.set_ylabel(f"Success Chance")
else:
ax.set_yticklabels([])
secundary_yticks=[alpha_eq/2, 1-beta_eq]
assert(secundary_yticks == [alpha_t/2, 1-beta_t])
secundary_yaxis = ax.secondary_yaxis('right')
secundary_yaxis.set_yticks(secundary_yticks)
secundary_yaxis.set_yticklabels([
r'$\alpha_{\mathregular{eq․–test}}/2=$'+'\n'+
r'$\beta_{\mathregular{t–test}}/2=$'+'\n'+
'%.1f%%'%(alpha_eq/2*100)+
'\n',
'\n'+
r'$1-\beta_{\mathregular{eq․–test}}=$'+'\n'+
r'$1-\alpha_{\mathregular{t–test}}=$'+'\n'+
'%.0f%%'%((1-beta_eq)*100)
] if y2label else [])
xlim = (-0.017, 0.017)
y_lower_bound = 0
ylim = (-0.03+y_lower_bound, 1.03)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
linewidth = 0.75
grid_linewidth = linewidth
thickgrid = {
"color": matplotlib.rcParams['grid.color'],
"linewidth": grid_linewidth*3,
"zorder": -2
}
for lim in [-Delta, 0, Delta]:
ax.plot([lim, lim], ylim, **thickgrid)
for y in secundary_yticks:
ax.plot(xlim, (y,y), **thickgrid)
ax.grid()
ax.set_axisbelow(True)
legend = ax.get_legend()
if legend is not None:
legend.remove()
mean_error_mu_text = r"Actual Error $\mu$ " + '\n'
# t-test ===================================================================
ax_ttest = axes[0,0]
lower = None
upper = None
sr = []
for sigma in sigma_to_use:
n = ceil(calc_n(sigma))
y = success_t_test(
x, sigma_hat, n
)
if sigma == sigma_hat / 2: upper = y
if sigma == sigma_hat: lower = y
sr += [pd.Series(
y, index=x,
name=r'$\sigma=$'+('% 5.1f%%'%(sigma*100))[1:].replace(' ', ' ')+
r' $\Rightarrow$ $n=$'+('% 5d'%n)[1:].replace(' ', ' ')+
r', $\sigma/\hat{\sigma}=$'+'%.3f'%(sigma/sigma_hat)
)]
for swap, section in [
[False, x <= -Delta],
[True, np.abs(x) <= Delta],
[False, x >= Delta]
]:
ax_ttest.fill_between(
x, lower, upper, alpha=transparency/2, where=section,
color=color_right if swap else color_wrong, linewidth=0.0
)
pd.concat(sr, axis=1).plot(
ax=ax_ttest, color=colors, alpha=transparency, style=styles
)
df_sim[Mode.T_TEST].plot(
ax=ax_ttest, linestyle='none', marker='+',
alpha=transparency, color=colors, legend=False
)
make_common_axes(
ax_ttest, True, False, mean_error_mu_text + (
r'In the t-Test, $\alpha_{\mathregular{t–test}}$ stays fixed.'+'\n'
'(Red, Blue) Areas: additionally (incorrectly, correctly)\n'+
'accepted/rejected'+
r' when not accounting for $\beta_{\mathregular{t–test}}$ in SSC.'
))
# equivalence test =========================================================
ax_eqtest = axes[0,1]
pd.concat([pd.Series(success_eq_test(
x, sigma_hat, ceil(calc_n(sigma))
), index=x) for sigma in sigma_to_use], axis=1).plot(
ax=ax_eqtest, color=colors, alpha=transparency, style=styles
)
df_sim[Mode.EQ_TEST].plot(
ax=ax_eqtest, linestyle='none', marker='+',
alpha=transparency, color=colors, legend=False
)
make_common_axes(
ax_eqtest, False, True,
mean_error_mu_text +
r'In the Equivalence Test '+
r'$\alpha_{\mathregular{eq․–test}}$ stays fixed.'+(
'\nAnalytic 0% Test Success Chance for '+
r'$\sigma$ ≤ '+('%.1f%%'%(sigma_zero_chance*100)) + '.' if \
alpha_eq == beta_eq else ''
))
# reference ================================================================
ax_ref = axes[1,0]
df_to_use = pd.Series(
success_eq_test(x, sigma_hat, ceil(calc_n(sigma_hat))), index=x
)
df_to_use.plot(
ax=ax_ref, color=reference_color,
alpha=transparency, style=reference_style
)
for swap, section in [
[False, x <= -Delta],
[True, np.abs(x) <= Delta],
[False, x >= Delta]
]:
ax_ref.fill_between(
x, np.ones_like(x), df_to_use, alpha=transparency,
where=section, color=color_wrong if swap else color_right,
linewidth=0.0
)
ax_ref.fill_between(
x, np.zeros_like(x), df_to_use, alpha=transparency,
where=section, color=color_right if swap else color_wrong,
linewidth=0.0
)
make_common_axes(
ax_ref, True, True, mean_error_mu_text +
'Golden Line: Perfect Sample Size Estimation,\n'+
'(Red, Blue) Areas: (incorrectly, correctly) accepted/rejected.'
)
# legend ===================================================================
legend_axis=axes[1,1]
legend_axis.legend(
*axes[0,0].get_legend_handles_labels(),
title=' Real Standard Deviation '+
r'$\hat{\sigma}=$'+('%.0f%%'%(sigma_hat*100))+',\n'+
'Standard Deviation '+r'$\sigma$'+' used for SSC:',
bbox_to_anchor = [1.03, 1.15]
)
legend_axis.axis('off')
fig.subplots_adjust(
hspace=0.7, wspace=0.05, left=0.09, right=0.9, top=0.89, bottom=0.05
)
# fig.savefig('t_test_vs_equivalence_test.pdf', bbox_inches='tight')
fig.savefig('t_test_vs_equivalence_test.png', dpi=300, bbox_inches='tight')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment