Created
November 6, 2019 18:06
-
-
Save marskar/a132b64d27a541b5edbbf64bc95d627a to your computer and use it in GitHub Desktop.
Statistical tests are linear models! Sources: Python - https://eigenfoo.xyz/tests-as-linear/ R - https://lindeloev.github.io/tests-as-linear/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from statsmodels.formula.api import ols | |
from scipy.stats import ttest_ind, mannwhitneyu | |
def permut_concat(iterable): | |
"""Concatenate iterables of arrays then randomize.""" | |
return np.random.permutation(np.concatenate(iterable)) | |
# Make two distributions: a, b | |
arra = permut_concat([np.zeros(50), np.random.normal(14, 4, 150)]) | |
arrb = permut_concat([np.zeros(100), np.random.normal(20, 5, 100)]) | |
# Make a DataFrame | |
df = pd.DataFrame().assign(a = arra, b = arrb) | |
# Convert from wide to long format | |
df_long = df.melt() | |
# Binary encode "variable" and rank "value" | |
df_long = df_long.assign( | |
variable = df_long.variable.map({"a": 0, "b": 1}), | |
rank = df_long.value.rank() | |
) | |
test = "Independent T test" | |
formula = "value ~ 1 + variable" | |
t, p = ttest_ind(df["a"], df["b"]) | |
res = ols(formula, df_long).fit() | |
print( | |
f"The p-value obtained from the {test} is {p}", | |
f"The p-value obtained from the {formula} linear model is {res.f_pvalue}", | |
sep="\n" | |
) | |
test = "Mann-Whitney U test" | |
formula = "rank ~ 1 + variable" | |
u, p = mannwhitneyu(df["a"], df["b"], alternative="two-sided") | |
res = ols(formula, df_long).fit() | |
print( | |
f"The p-value obtained from the {test} is {p}", | |
f"The p-value obtained from the {formula} linear model is {res.f_pvalue}", | |
sep="\n" | |
) | |
test = "Welch's T test" | |
formula = "value ~ 1 + variable" | |
t, p = ttest_ind(df["a"], df["b"], equal_var=False) | |
res = ols(formula, df_long).fit() | |
print( | |
f"The p-value obtained from the {test} is {p}", | |
f"The p-value obtained from the {formula} linear model is {res.f_pvalue}", | |
sep="\n" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment