larsoner/check_roi_selection.py

## check_roi_selection.py
# Check 1-tailed t-test bias when selecting ROIs using different methods
import numpy as np
from scipy.stats import ttest_ind

rng = np.random.default_rng(0)

n_subjects = 20  # number of subjects to simulate
n_sensors = 100  # number of "sensors" (could be source points, etc.)
n_run = 5000  # number of times to simulate (n_subjects, n_sensors) values
prop_false = np.zeros((n_run, 3, 2))  # proportion of false alarms
n_roi = int(round(0.05 * n_sensors))  # number of sensors to choose for ROI (5% here)
for ri in range(n_run):
    # Simulate data from n_subjects subjects and n_sensors sensors that is pure
    # noise
    A = rng.normal(size=(n_subjects, n_sensors))
    B = rng.normal(size=(n_subjects, n_sensors))
    mean_sum = A.mean(axis=0) + B.mean(axis=0)
    mean_diff = A.mean(axis=0) - B.mean(axis=0)
    assert mean_sum.shape == mean_diff.shape == (n_sensors,)
    for ai, average_sensors in enumerate((False, True)):
        # 1. naive: just compute all stats
        this_A, this_B = A, B
        if average_sensors:
            this_A, this_B = this_A.mean(-1), this_B.mean(-1)
        _, p = ttest_ind(this_A, this_B, alternative='greater')
        prop_false[ri, 0, ai] = (p < 0.05).sum() / p.size

        # 2. take top 5% of mean activation sum then compute stats on difference
        roi = np.argsort(mean_sum)[::-1][:n_roi]
        this_A, this_B = A[:, roi], B[:, roi]
        if average_sensors:
            this_A, this_B = this_A.mean(-1), this_B.mean(-1)
        _, p = ttest_ind(this_A, this_B, axis=0, alternative='greater')
        prop_false[ri, 1, ai] = (p < 0.05).sum() / p.size
        # 3. take top 5% of mean activate difference then compute stats on difference
        roi = np.argsort(mean_diff)[::-1][:n_roi]
        this_A, this_B = A[:, roi], B[:, roi]
        if average_sensors:
            this_A, this_B = this_A.mean(-1), this_B.mean(-1)
        _, p = ttest_ind(this_A, this_B, axis=0, alternative='greater')
        prop_false[ri, 2, ai] = (p < 0.05).sum() / p.size

with np.printoptions(precision=2, suppress=True):
    print('False alarm percent (should be 5%)')
    print('Left: sensors independently tested; right: averaged across them')
    for ki, kind in enumerate(('naive', 'sum', 'diff')):
        print(f'- {kind}: {100 * prop_false.mean(axis=0)[ki]}')
	# Check 1-tailed t-test bias when selecting ROIs using different methods
	import numpy as np
	from scipy.stats import ttest_ind

	rng = np.random.default_rng(0)

	n_subjects = 20 # number of subjects to simulate
	n_sensors = 100 # number of "sensors" (could be source points, etc.)
	n_run = 5000 # number of times to simulate (n_subjects, n_sensors) values
	prop_false = np.zeros((n_run, 3, 2)) # proportion of false alarms
	n_roi = int(round(0.05 * n_sensors)) # number of sensors to choose for ROI (5% here)
	for ri in range(n_run):
	# Simulate data from n_subjects subjects and n_sensors sensors that is pure
	# noise
	A = rng.normal(size=(n_subjects, n_sensors))
	B = rng.normal(size=(n_subjects, n_sensors))
	mean_sum = A.mean(axis=0) + B.mean(axis=0)
	mean_diff = A.mean(axis=0) - B.mean(axis=0)
	assert mean_sum.shape == mean_diff.shape == (n_sensors,)
	for ai, average_sensors in enumerate((False, True)):
	# 1. naive: just compute all stats
	this_A, this_B = A, B
	if average_sensors:
	this_A, this_B = this_A.mean(-1), this_B.mean(-1)
	_, p = ttest_ind(this_A, this_B, alternative='greater')
	prop_false[ri, 0, ai] = (p < 0.05).sum() / p.size

	# 2. take top 5% of mean activation sum then compute stats on difference
	roi = np.argsort(mean_sum)[::-1][:n_roi]
	this_A, this_B = A[:, roi], B[:, roi]
	if average_sensors:
	this_A, this_B = this_A.mean(-1), this_B.mean(-1)
	_, p = ttest_ind(this_A, this_B, axis=0, alternative='greater')
	prop_false[ri, 1, ai] = (p < 0.05).sum() / p.size
	# 3. take top 5% of mean activate difference then compute stats on difference
	roi = np.argsort(mean_diff)[::-1][:n_roi]
	this_A, this_B = A[:, roi], B[:, roi]
	if average_sensors:
	this_A, this_B = this_A.mean(-1), this_B.mean(-1)
	_, p = ttest_ind(this_A, this_B, axis=0, alternative='greater')
	prop_false[ri, 2, ai] = (p < 0.05).sum() / p.size

	with np.printoptions(precision=2, suppress=True):
	print('False alarm percent (should be 5%)')
	print('Left: sensors independently tested; right: averaged across them')
	for ki, kind in enumerate(('naive', 'sum', 'diff')):
	print(f'- {kind}: {100 * prop_false.mean(axis=0)[ki]}')