eliorc/permutation_t_test.py

## permutation_t_test.py
import numpy as np
from scipy import stats


def permutation_t_test(sample1: list, sample2: list, n: int =10000) -> float:
    """
    Conduct a permutation T-test using n iterations and p-value. The higher the p-value, the more likely that sample1
    and sample2 are sampled from the same distribution

    :param sample1: Sample of continuous observations
    :param sample2: Sample of continuous observations
    :param n: Number of time to sample T-statistics from randomly selected samples taken from sample1+sample2
    :return: p-value
    """

    population = sample1 + sample2  # Total samples
    sample_size = len(sample2)

    t_test_results = list()
    for _ in range(n):
        # Pick random indices
        random_indices = np.random.choice(range(len(population)), sample_size, replace=False)

        population_ = [x for i, x in enumerate(population) if i not in random_indices]
        random_sample = [x for i, x in enumerate(population) if i in random_indices]

        # Two sample t-test
        t_test_results.append(stats.ttest_ind(a=population_,
                                              b=random_sample,
                                              equal_var=False)[0])  # 0 is the T statistic

    sample_t_score = stats.ttest_ind(a=sample1,
                                     b=sample2,
                                     equal_var=False)[0]

    # Calculate the Z-score from of the original sample Z-score (t_test_results guaranteed to be normal)
    t_test_results = np.array(t_test_results)
    z_score = (sample_t_score - t_test_results.mean()) / t_test_results.std()

    return stats.norm.sf(abs(z_score)) * 2  # Two tailed p-value
	import numpy as np
	from scipy import stats


	def permutation_t_test(sample1: list, sample2: list, n: int =10000) -> float:
	"""
	Conduct a permutation T-test using n iterations and p-value. The higher the p-value, the more likely that sample1
	and sample2 are sampled from the same distribution

	:param sample1: Sample of continuous observations
	:param sample2: Sample of continuous observations
	:param n: Number of time to sample T-statistics from randomly selected samples taken from sample1+sample2
	:return: p-value
	"""

	population = sample1 + sample2 # Total samples
	sample_size = len(sample2)

	t_test_results = list()
	for _ in range(n):
	# Pick random indices
	random_indices = np.random.choice(range(len(population)), sample_size, replace=False)

	population_ = [x for i, x in enumerate(population) if i not in random_indices]
	random_sample = [x for i, x in enumerate(population) if i in random_indices]

	# Two sample t-test
	t_test_results.append(stats.ttest_ind(a=population_,
	b=random_sample,
	equal_var=False)[0]) # 0 is the T statistic

	sample_t_score = stats.ttest_ind(a=sample1,
	b=sample2,
	equal_var=False)[0]

	# Calculate the Z-score from of the original sample Z-score (t_test_results guaranteed to be normal)
	t_test_results = np.array(t_test_results)
	z_score = (sample_t_score - t_test_results.mean()) / t_test_results.std()

	return stats.norm.sf(abs(z_score)) * 2 # Two tailed p-value