Skip to content

Instantly share code, notes, and snippets.

@eliorc
Last active January 14, 2019 15:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eliorc/bd8e1beceb29f9d4da9b362f65c15d51 to your computer and use it in GitHub Desktop.
Save eliorc/bd8e1beceb29f9d4da9b362f65c15d51 to your computer and use it in GitHub Desktop.
Permutation T-test (statistical)
import numpy as np
from scipy import stats
def permutation_t_test(sample1: list, sample2: list, n: int =10000) -> float:
"""
Conduct a permutation T-test using n iterations and p-value. The higher the p-value, the more likely that sample1
and sample2 are sampled from the same distribution
:param sample1: Sample of continuous observations
:param sample2: Sample of continuous observations
:param n: Number of time to sample T-statistics from randomly selected samples taken from sample1+sample2
:return: p-value
"""
population = sample1 + sample2 # Total samples
sample_size = len(sample2)
t_test_results = list()
for _ in range(n):
# Pick random indices
random_indices = np.random.choice(range(len(population)), sample_size, replace=False)
population_ = [x for i, x in enumerate(population) if i not in random_indices]
random_sample = [x for i, x in enumerate(population) if i in random_indices]
# Two sample t-test
t_test_results.append(stats.ttest_ind(a=population_,
b=random_sample,
equal_var=False)[0]) # 0 is the T statistic
sample_t_score = stats.ttest_ind(a=sample1,
b=sample2,
equal_var=False)[0]
# Calculate the Z-score from of the original sample Z-score (t_test_results guaranteed to be normal)
t_test_results = np.array(t_test_results)
z_score = (sample_t_score - t_test_results.mean()) / t_test_results.std()
return stats.norm.sf(abs(z_score)) * 2 # Two tailed p-value
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment