xor2k/t_test_vs_equivalence_test.py

## t_test_vs_equivalence_test.py
from enum import Enum
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
from scipy.stats import norm
import pandas as pd
import numpy as np
import math
from math import sqrt, ceil
from itertools import repeat
from multiprocessing import Pool
import importlib
import time

Delta = 0.01
alpha_eq = 0.05
beta_eq = 0.05
alpha_t = beta_eq
beta_t = alpha_eq
sigma_hat = 0.15
sigma_zero_chance = sigma_hat / 2
sigma_to_use = [0.05, sigma_zero_chance, 0.08, 0.1, sigma_hat, 0.2]
reference_color = 'gold'
colors = [
    'tab:red', 'tab:purple', 'tab:pink', 'tab:orange',
    reference_color, 'tab:green'
]

simsize = 10000
x_steps_sim = 40
x_steps_analytic = 200
# np.linspace might miss -Delta/Delta due to numerical instability otherwise
x_step_decimals = 4

force_no_tensorflow = False

# compare e.g. equation 3.6 in https://eprints.whiterose.ac.uk/145474/1/
def success_eq_test(D_bar, sigma, n):
    return np.clip(
        norm.cdf( (D_bar + Delta) * sqrt(n)/sigma - norm.ppf(1-alpha_eq/2) ) -
        norm.cdf( (D_bar - Delta) * sqrt(n)/sigma + norm.ppf(1-alpha_eq/2) ),
    0, 1)

def success_t_test(D_bar, sigma, n):
    return np.clip(
        norm.cdf( D_bar * sqrt(n)/sigma + norm.ppf(1-alpha_t/2) )-
        norm.cdf( D_bar * sqrt(n)/sigma - norm.ppf(1-alpha_t/2) ),
    0, 1)

def eq_test(D_bar, sigma_hat, n):
    return abs(D_bar) <= Delta - norm.ppf(1-alpha_eq/2)*sigma_hat / sqrt(n)

def t_test(D_bar, sigma_hat, n):
    return abs(D_bar) <= norm.ppf(1-alpha_t/2)*sigma_hat / math.sqrt(n)

# symmetric in alpha and beta, works for both t-test and equivalence test
def calc_n(sigma):
    return (norm.ppf(1-alpha_eq/2)+norm.ppf(1-beta_eq/2))**2*(sigma**2)/Delta**2

if __name__ == '__main__':

    use_tensorflow = importlib.util.find_spec("tensorflow") is not None and \
        not force_no_tensorflow

    def more_or_less_stable_linspace(steps):
        return np.sort(np.unique(np.append(np.round(np.linspace(
            -0.02, 0.02, steps + 1), decimals=x_step_decimals),
            np.array([-Delta, 0, Delta]
        ))))

    x_sim = more_or_less_stable_linspace(x_steps_sim)
    x = more_or_less_stable_linspace(x_steps_analytic)

    sigma_to_use_len = len(sigma_to_use)
    reference_index = sigma_to_use.index(sigma_hat)

    t0 = time.time()

    df_sim = []

    class Mode(Enum):
        T_TEST = 1
        EQ_TEST = 2

    if use_tensorflow:
        import os
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
        # os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
        import tensorflow as tf

        tf.random.set_seed(0)

        for mode in Mode:
            sr_sim = []
            for sigma in sigma_to_use:
                n = ceil(calc_n(sigma))
                sr_sim_ = []
                for x_ in x_sim:
                    sim_mean, sim_var = tf.nn.moments(tf.random.normal(
                        (simsize, n), x_, sigma_hat
                    ), axes=[1])
                    sim_std = tf.sqrt(sim_var)

                    sr_sim_ += [
                        tf.reduce_mean(tf.cast((
                            eq_test if mode == Mode.EQ_TEST else t_test
                        )(
                            sim_mean, sim_std, n
                        ), tf.float32), axis=0).numpy()
                    ]

                sr_sim += [pd.Series(
                    sr_sim_, index=x_sim, name="_hidden"
                )]

            df_sim += [pd.concat(sr_sim, axis=1)]
    else:
        np.random.seed(0)

        def worker(args):
            mode, sigma = args
            n = ceil(calc_n(sigma))
            sr_sim_ = []
            for x_ in x_sim:
                v = np.random.normal(x_, sigma_hat, simsize*n).reshape((
                    simsize, n
                ))
                sim_mean, sim_std = v.mean(axis=1), v.std(axis=1)

                sr_sim_ += [(eq_test if mode == Mode.EQ_TEST else t_test)(
                    sim_mean, sim_std, n
                ).mean()]

            return pd.Series(
                sr_sim_, index=x_sim, name="_hidden"
            )

        with Pool() as p:
            df_sim = [pd.concat(v.get(), axis=1) for v in [p.map_async(
                worker, zip(repeat(mode), sigma_to_use)) for mode in Mode
            ]]

    df_sim = pd.concat(df_sim, axis=1, keys=Mode)

    print(f'simulation duration %.4fs'%(time.time()-t0))

    fig, axes = plt.subplots(2, 2)

    fig.set_figwidth(fig.get_figwidth()*1.5)

    mid = (fig.subplotpars.right + fig.subplotpars.left)/2
    fig.suptitle(
        f'Test Success Chance: Different Sample Sizes'+
        '/Sample Size Calculations (SSC), '+r'$\Delta=d_r=1.0$'+'%.\n'+
        ' Analytic (Lines) and Normal Distribution Simulation '+
        f'(Markers: +, Runs: {simsize})',
        ha='center'
    )

    reference_style = '-'

    styles = [reference_style]*sigma_to_use_len
    styles[reference_index] = reference_style

    transparency = 0.7

    color_wrong = 'tab:red'
    color_right = 'tab:blue'

    def make_common_axes(ax, yticklabel, y2label, xlabel):
        ax.xaxis.set_major_formatter(mtick.PercentFormatter(1.0, decimals=1))
        ax.set_xlabel(xlabel)

        if yticklabel:
            ax.yaxis.set_major_formatter(
                mtick.PercentFormatter(1.0, decimals=0
            ))
            ax.set_ylabel(f"Success Chance")
        else:
            ax.set_yticklabels([])

        secundary_yticks=[alpha_eq/2, 1-beta_eq]
        assert(secundary_yticks == [alpha_t/2, 1-beta_t])

        secundary_yaxis = ax.secondary_yaxis('right')
        secundary_yaxis.set_yticks(secundary_yticks)

        secundary_yaxis.set_yticklabels([
            r'$\alpha_{\mathregular{eq․–test}}/2=$'+'\n'+
            r'$\beta_{\mathregular{t–test}}/2=$'+'\n'+
            '%.1f%%'%(alpha_eq/2*100)+
            '\n',

            '\n'+
            r'$1-\beta_{\mathregular{eq․–test}}=$'+'\n'+
            r'$1-\alpha_{\mathregular{t–test}}=$'+'\n'+
            '%.0f%%'%((1-beta_eq)*100)
        ] if y2label else [])

        xlim = (-0.017, 0.017)

        y_lower_bound = 0
        ylim = (-0.03+y_lower_bound, 1.03)

        ax.set_xlim(xlim)
        ax.set_ylim(ylim)

        linewidth = 0.75
        grid_linewidth = linewidth

        thickgrid = {
            "color": matplotlib.rcParams['grid.color'],
            "linewidth": grid_linewidth*3,
            "zorder": -2
        }

        for lim in [-Delta, 0, Delta]:
            ax.plot([lim, lim], ylim, **thickgrid)

        for y in secundary_yticks:
            ax.plot(xlim, (y,y), **thickgrid)

        ax.grid()
        ax.set_axisbelow(True)
        legend = ax.get_legend()
        if legend is not None:
            legend.remove()

    mean_error_mu_text = r"Actual Error $\mu$ " + '\n'

    # t-test ===================================================================
    ax_ttest = axes[0,0]

    lower = None
    upper = None

    sr = []

    for sigma in sigma_to_use:
        n = ceil(calc_n(sigma))
        y = success_t_test(
            x, sigma_hat, n
        )
        if sigma == sigma_hat / 2: upper = y
        if sigma == sigma_hat: lower = y
        sr += [pd.Series(
            y, index=x,
            name=r'$\sigma=$'+('% 5.1f%%'%(sigma*100))[1:].replace(' ', ' ')+
            r' $\Rightarrow$ $n=$'+('% 5d'%n)[1:].replace(' ', ' ')+
            r', $\sigma/\hat{\sigma}=$'+'%.3f'%(sigma/sigma_hat)
        )]

    for swap, section in [
        [False, x <= -Delta],
        [True, np.abs(x) <= Delta],
        [False, x >= Delta]
    ]:
        ax_ttest.fill_between(
            x, lower, upper, alpha=transparency/2, where=section,
            color=color_right if swap else color_wrong, linewidth=0.0
        )

    pd.concat(sr, axis=1).plot(
        ax=ax_ttest, color=colors, alpha=transparency, style=styles
    )
    df_sim[Mode.T_TEST].plot(
        ax=ax_ttest, linestyle='none', marker='+',
        alpha=transparency, color=colors, legend=False
    )

    make_common_axes(
        ax_ttest, True, False, mean_error_mu_text + (
        r'In the t-Test, $\alpha_{\mathregular{t–test}}$ stays fixed.'+'\n'
        '(Red, Blue) Areas: additionally (incorrectly, correctly)\n'+
        'accepted/rejected'+
        r' when not accounting for $\beta_{\mathregular{t–test}}$ in SSC.'
    ))

    # equivalence test =========================================================
    ax_eqtest = axes[0,1]

    pd.concat([pd.Series(success_eq_test(
            x, sigma_hat, ceil(calc_n(sigma))
        ), index=x) for sigma in sigma_to_use], axis=1).plot(
        ax=ax_eqtest, color=colors, alpha=transparency, style=styles
    )
    df_sim[Mode.EQ_TEST].plot(
        ax=ax_eqtest, linestyle='none', marker='+',
        alpha=transparency, color=colors, legend=False
    )

    make_common_axes(
        ax_eqtest, False, True,
        mean_error_mu_text +
        r'In the Equivalence Test '+
        r'$\alpha_{\mathregular{eq․–test}}$ stays fixed.'+(
        '\nAnalytic 0% Test Success Chance for '+
        r'$\sigma$ ≤ '+('%.1f%%'%(sigma_zero_chance*100)) + '.' if \
        alpha_eq == beta_eq else ''
    ))

    # reference ================================================================
    ax_ref = axes[1,0]

    df_to_use = pd.Series(
        success_eq_test(x, sigma_hat, ceil(calc_n(sigma_hat))), index=x
    )
    df_to_use.plot(
        ax=ax_ref, color=reference_color,
        alpha=transparency, style=reference_style
    )
    for swap, section in [
        [False, x <= -Delta],
        [True, np.abs(x) <= Delta],
        [False, x >= Delta]
    ]:
        ax_ref.fill_between(
            x, np.ones_like(x), df_to_use, alpha=transparency,
            where=section, color=color_wrong if swap else color_right,
            linewidth=0.0
        )
        ax_ref.fill_between(
            x, np.zeros_like(x), df_to_use, alpha=transparency,
            where=section, color=color_right if swap else color_wrong,
            linewidth=0.0
        )

    make_common_axes(
        ax_ref, True, True, mean_error_mu_text +
        'Golden Line: Perfect Sample Size Estimation,\n'+
        '(Red, Blue) Areas: (incorrectly, correctly) accepted/rejected.'
    )

    # legend ===================================================================
    legend_axis=axes[1,1]
    legend_axis.legend(
        *axes[0,0].get_legend_handles_labels(),
        title=' Real Standard Deviation '+
        r'$\hat{\sigma}=$'+('%.0f%%'%(sigma_hat*100))+',\n'+
        'Standard Deviation '+r'$\sigma$'+' used for SSC:',
        bbox_to_anchor = [1.03, 1.15]
    )
    legend_axis.axis('off')

    fig.subplots_adjust(
        hspace=0.7, wspace=0.05, left=0.09, right=0.9, top=0.89, bottom=0.05
    )
    # fig.savefig('t_test_vs_equivalence_test.pdf', bbox_inches='tight')
    fig.savefig('t_test_vs_equivalence_test.png', dpi=300, bbox_inches='tight')
	from enum import Enum
	import matplotlib
	matplotlib.use('agg')
	import matplotlib.pyplot as plt
	import matplotlib.ticker as mtick
	from scipy.stats import norm
	import pandas as pd
	import numpy as np
	import math
	from math import sqrt, ceil
	from itertools import repeat
	from multiprocessing import Pool
	import importlib
	import time

	Delta = 0.01
	alpha_eq = 0.05
	beta_eq = 0.05
	alpha_t = beta_eq
	beta_t = alpha_eq
	sigma_hat = 0.15
	sigma_zero_chance = sigma_hat / 2
	sigma_to_use = [0.05, sigma_zero_chance, 0.08, 0.1, sigma_hat, 0.2]
	reference_color = 'gold'
	colors = [
	'tab:red', 'tab:purple', 'tab:pink', 'tab:orange',
	reference_color, 'tab:green'
	]

	simsize = 10000
	x_steps_sim = 40
	x_steps_analytic = 200
	# np.linspace might miss -Delta/Delta due to numerical instability otherwise
	x_step_decimals = 4

	force_no_tensorflow = False

	# compare e.g. equation 3.6 in https://eprints.whiterose.ac.uk/145474/1/
	def success_eq_test(D_bar, sigma, n):
	return np.clip(
	norm.cdf( (D_bar + Delta) * sqrt(n)/sigma - norm.ppf(1-alpha_eq/2) ) -
	norm.cdf( (D_bar - Delta) * sqrt(n)/sigma + norm.ppf(1-alpha_eq/2) ),
	0, 1)

	def success_t_test(D_bar, sigma, n):
	return np.clip(
	norm.cdf( D_bar * sqrt(n)/sigma + norm.ppf(1-alpha_t/2) )-
	norm.cdf( D_bar * sqrt(n)/sigma - norm.ppf(1-alpha_t/2) ),
	0, 1)

	def eq_test(D_bar, sigma_hat, n):
	return abs(D_bar) <= Delta - norm.ppf(1-alpha_eq/2)*sigma_hat / sqrt(n)

	def t_test(D_bar, sigma_hat, n):
	return abs(D_bar) <= norm.ppf(1-alpha_t/2)*sigma_hat / math.sqrt(n)

	# symmetric in alpha and beta, works for both t-test and equivalence test
	def calc_n(sigma):
	return (norm.ppf(1-alpha_eq/2)+norm.ppf(1-beta_eq/2))*2(sigma2)/Delta2

	if __name__ == '__main__':

	use_tensorflow = importlib.util.find_spec("tensorflow") is not None and \
	not force_no_tensorflow

	def more_or_less_stable_linspace(steps):
	return np.sort(np.unique(np.append(np.round(np.linspace(
	-0.02, 0.02, steps + 1), decimals=x_step_decimals),
	np.array([-Delta, 0, Delta]
	))))

	x_sim = more_or_less_stable_linspace(x_steps_sim)
	x = more_or_less_stable_linspace(x_steps_analytic)

	sigma_to_use_len = len(sigma_to_use)
	reference_index = sigma_to_use.index(sigma_hat)

	t0 = time.time()

	df_sim = []

	class Mode(Enum):
	T_TEST = 1
	EQ_TEST = 2

	if use_tensorflow:
	import os
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
	# os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
	import tensorflow as tf

	tf.random.set_seed(0)

	for mode in Mode:
	sr_sim = []
	for sigma in sigma_to_use:
	n = ceil(calc_n(sigma))
	sr_sim_ = []
	for x_ in x_sim:
	sim_mean, sim_var = tf.nn.moments(tf.random.normal(
	(simsize, n), x_, sigma_hat
	), axes=[1])
	sim_std = tf.sqrt(sim_var)

	sr_sim_ += [
	tf.reduce_mean(tf.cast((
	eq_test if mode == Mode.EQ_TEST else t_test
	)(
	sim_mean, sim_std, n
	), tf.float32), axis=0).numpy()
	]

	sr_sim += [pd.Series(
	sr_sim_, index=x_sim, name="_hidden"
	)]

	df_sim += [pd.concat(sr_sim, axis=1)]
	else:
	np.random.seed(0)

	def worker(args):
	mode, sigma = args
	n = ceil(calc_n(sigma))
	sr_sim_ = []
	for x_ in x_sim:
	v = np.random.normal(x_, sigma_hat, simsize*n).reshape((
	simsize, n
	))
	sim_mean, sim_std = v.mean(axis=1), v.std(axis=1)

	sr_sim_ += [(eq_test if mode == Mode.EQ_TEST else t_test)(
	sim_mean, sim_std, n
	).mean()]

	return pd.Series(
	sr_sim_, index=x_sim, name="_hidden"
	)

	with Pool() as p:
	df_sim = [pd.concat(v.get(), axis=1) for v in [p.map_async(
	worker, zip(repeat(mode), sigma_to_use)) for mode in Mode
	]]

	df_sim = pd.concat(df_sim, axis=1, keys=Mode)

	print(f'simulation duration %.4fs'%(time.time()-t0))

	fig, axes = plt.subplots(2, 2)

	fig.set_figwidth(fig.get_figwidth()*1.5)

	mid = (fig.subplotpars.right + fig.subplotpars.left)/2
	fig.suptitle(
	f'Test Success Chance: Different Sample Sizes'+
	'/Sample Size Calculations (SSC), '+r'$\Delta=d_r=1.0$'+'%.\n'+
	' Analytic (Lines) and Normal Distribution Simulation '+
	f'(Markers: +, Runs: {simsize})',
	ha='center'
	)

	reference_style = '-'

	styles = [reference_style]*sigma_to_use_len
	styles[reference_index] = reference_style

	transparency = 0.7

	color_wrong = 'tab:red'
	color_right = 'tab:blue'

	def make_common_axes(ax, yticklabel, y2label, xlabel):
	ax.xaxis.set_major_formatter(mtick.PercentFormatter(1.0, decimals=1))
	ax.set_xlabel(xlabel)

	if yticklabel:
	ax.yaxis.set_major_formatter(
	mtick.PercentFormatter(1.0, decimals=0
	))
	ax.set_ylabel(f"Success Chance")
	else:
	ax.set_yticklabels([])

	secundary_yticks=[alpha_eq/2, 1-beta_eq]
	assert(secundary_yticks == [alpha_t/2, 1-beta_t])

	secundary_yaxis = ax.secondary_yaxis('right')
	secundary_yaxis.set_yticks(secundary_yticks)

	secundary_yaxis.set_yticklabels([
	r'$\alpha_{\mathregular{eq․–test}}/2=$'+'\n'+
	r'$\beta_{\mathregular{t–test}}/2=$'+'\n'+
	'%.1f%%'%(alpha_eq/2*100)+
	'\n',

	'\n'+
	r'$1-\beta_{\mathregular{eq․–test}}=$'+'\n'+
	r'$1-\alpha_{\mathregular{t–test}}=$'+'\n'+
	'%.0f%%'%((1-beta_eq)*100)
	] if y2label else [])

	xlim = (-0.017, 0.017)

	y_lower_bound = 0
	ylim = (-0.03+y_lower_bound, 1.03)

	ax.set_xlim(xlim)
	ax.set_ylim(ylim)

	linewidth = 0.75
	grid_linewidth = linewidth

	thickgrid = {
	"color": matplotlib.rcParams['grid.color'],
	"linewidth": grid_linewidth*3,
	"zorder": -2
	}

	for lim in [-Delta, 0, Delta]:
	ax.plot([lim, lim], ylim, **thickgrid)

	for y in secundary_yticks:
	ax.plot(xlim, (y,y), **thickgrid)

	ax.grid()
	ax.set_axisbelow(True)
	legend = ax.get_legend()
	if legend is not None:
	legend.remove()

	mean_error_mu_text = r"Actual Error $\mu$ " + '\n'

	# t-test ===================================================================
	ax_ttest = axes[0,0]

	lower = None
	upper = None

	sr = []

	for sigma in sigma_to_use:
	n = ceil(calc_n(sigma))
	y = success_t_test(
	x, sigma_hat, n
	)
	if sigma == sigma_hat / 2: upper = y
	if sigma == sigma_hat: lower = y
	sr += [pd.Series(
	y, index=x,
	name=r'$\sigma=$'+('% 5.1f%%'%(sigma*100))[1:].replace(' ', ' ')+
	r' $\Rightarrow$ $n=$'+('% 5d'%n)[1:].replace(' ', ' ')+
	r', $\sigma/\hat{\sigma}=$'+'%.3f'%(sigma/sigma_hat)
	)]

	for swap, section in [
	[False, x <= -Delta],
	[True, np.abs(x) <= Delta],
	[False, x >= Delta]
	]:
	ax_ttest.fill_between(
	x, lower, upper, alpha=transparency/2, where=section,
	color=color_right if swap else color_wrong, linewidth=0.0
	)

	pd.concat(sr, axis=1).plot(
	ax=ax_ttest, color=colors, alpha=transparency, style=styles
	)
	df_sim[Mode.T_TEST].plot(
	ax=ax_ttest, linestyle='none', marker='+',
	alpha=transparency, color=colors, legend=False
	)

	make_common_axes(
	ax_ttest, True, False, mean_error_mu_text + (
	r'In the t-Test, $\alpha_{\mathregular{t–test}}$ stays fixed.'+'\n'
	'(Red, Blue) Areas: additionally (incorrectly, correctly)\n'+
	'accepted/rejected'+
	r' when not accounting for $\beta_{\mathregular{t–test}}$ in SSC.'
	))

	# equivalence test =========================================================
	ax_eqtest = axes[0,1]

	pd.concat([pd.Series(success_eq_test(
	x, sigma_hat, ceil(calc_n(sigma))
	), index=x) for sigma in sigma_to_use], axis=1).plot(
	ax=ax_eqtest, color=colors, alpha=transparency, style=styles
	)
	df_sim[Mode.EQ_TEST].plot(
	ax=ax_eqtest, linestyle='none', marker='+',
	alpha=transparency, color=colors, legend=False
	)

	make_common_axes(
	ax_eqtest, False, True,
	mean_error_mu_text +
	r'In the Equivalence Test '+
	r'$\alpha_{\mathregular{eq․–test}}$ stays fixed.'+(
	'\nAnalytic 0% Test Success Chance for '+
	r'$\sigma$ ≤ '+('%.1f%%'%(sigma_zero_chance*100)) + '.' if \
	alpha_eq == beta_eq else ''
	))

	# reference ================================================================
	ax_ref = axes[1,0]

	df_to_use = pd.Series(
	success_eq_test(x, sigma_hat, ceil(calc_n(sigma_hat))), index=x
	)
	df_to_use.plot(
	ax=ax_ref, color=reference_color,
	alpha=transparency, style=reference_style
	)
	for swap, section in [
	[False, x <= -Delta],
	[True, np.abs(x) <= Delta],
	[False, x >= Delta]
	]:
	ax_ref.fill_between(
	x, np.ones_like(x), df_to_use, alpha=transparency,
	where=section, color=color_wrong if swap else color_right,
	linewidth=0.0
	)
	ax_ref.fill_between(
	x, np.zeros_like(x), df_to_use, alpha=transparency,
	where=section, color=color_right if swap else color_wrong,
	linewidth=0.0
	)

	make_common_axes(
	ax_ref, True, True, mean_error_mu_text +
	'Golden Line: Perfect Sample Size Estimation,\n'+
	'(Red, Blue) Areas: (incorrectly, correctly) accepted/rejected.'
	)

	# legend ===================================================================
	legend_axis=axes[1,1]
	legend_axis.legend(
	*axes[0,0].get_legend_handles_labels(),
	title=' Real Standard Deviation '+
	r'$\hat{\sigma}=$'+('%.0f%%'%(sigma_hat*100))+',\n'+
	'Standard Deviation '+r'$\sigma$'+' used for SSC:',
	bbox_to_anchor = [1.03, 1.15]
	)
	legend_axis.axis('off')

	fig.subplots_adjust(
	hspace=0.7, wspace=0.05, left=0.09, right=0.9, top=0.89, bottom=0.05
	)
	# fig.savefig('t_test_vs_equivalence_test.pdf', bbox_inches='tight')
	fig.savefig('t_test_vs_equivalence_test.png', dpi=300, bbox_inches='tight')