marskar/test-as-linear.py

## test-as-linear.py
import numpy as np
import pandas as pd
from statsmodels.formula.api import ols
from scipy.stats import ttest_ind, mannwhitneyu

def permut_concat(iterable):
    """Concatenate iterables of arrays then randomize."""
    return np.random.permutation(np.concatenate(iterable))

# Make two distributions: a, b
arra = permut_concat([np.zeros(50), np.random.normal(14, 4, 150)])
arrb = permut_concat([np.zeros(100), np.random.normal(20, 5, 100)])

# Make a DataFrame
df = pd.DataFrame().assign(a = arra, b = arrb)

# Convert from wide to long format
df_long = df.melt()

# Binary encode "variable" and rank "value"
df_long = df_long.assign(
    variable = df_long.variable.map({"a": 0, "b": 1}),
    rank = df_long.value.rank()
)

test = "Independent T test"
formula = "value ~ 1 + variable"
t, p = ttest_ind(df["a"], df["b"])
res = ols(formula, df_long).fit()

print(
    f"The p-value obtained from the {test} is {p}",
    f"The p-value obtained from the {formula} linear model is {res.f_pvalue}",
    sep="\n"
)

test = "Mann-Whitney U test"
formula = "rank ~ 1 + variable"
u, p = mannwhitneyu(df["a"], df["b"], alternative="two-sided")
res = ols(formula, df_long).fit()

print(
    f"The p-value obtained from the {test} is {p}",
    f"The p-value obtained from the {formula} linear model is {res.f_pvalue}",
    sep="\n"
)

test = "Welch's T test"
formula = "value ~ 1 + variable"
t, p = ttest_ind(df["a"], df["b"], equal_var=False)
res = ols(formula, df_long).fit()

print(
    f"The p-value obtained from the {test} is {p}",
    f"The p-value obtained from the {formula} linear model is {res.f_pvalue}",
    sep="\n"
)
	import numpy as np
	import pandas as pd
	from statsmodels.formula.api import ols
	from scipy.stats import ttest_ind, mannwhitneyu

	def permut_concat(iterable):
	"""Concatenate iterables of arrays then randomize."""
	return np.random.permutation(np.concatenate(iterable))

	# Make two distributions: a, b
	arra = permut_concat([np.zeros(50), np.random.normal(14, 4, 150)])
	arrb = permut_concat([np.zeros(100), np.random.normal(20, 5, 100)])

	# Make a DataFrame
	df = pd.DataFrame().assign(a = arra, b = arrb)

	# Convert from wide to long format
	df_long = df.melt()

	# Binary encode "variable" and rank "value"
	df_long = df_long.assign(
	variable = df_long.variable.map({"a": 0, "b": 1}),
	rank = df_long.value.rank()
	)

	test = "Independent T test"
	formula = "value ~ 1 + variable"
	t, p = ttest_ind(df["a"], df["b"])
	res = ols(formula, df_long).fit()

	print(
	f"The p-value obtained from the {test} is {p}",
	f"The p-value obtained from the {formula} linear model is {res.f_pvalue}",
	sep="\n"
	)

	test = "Mann-Whitney U test"
	formula = "rank ~ 1 + variable"
	u, p = mannwhitneyu(df["a"], df["b"], alternative="two-sided")
	res = ols(formula, df_long).fit()

	print(
	f"The p-value obtained from the {test} is {p}",
	f"The p-value obtained from the {formula} linear model is {res.f_pvalue}",
	sep="\n"
	)

	test = "Welch's T test"
	formula = "value ~ 1 + variable"
	t, p = ttest_ind(df["a"], df["b"], equal_var=False)
	res = ols(formula, df_long).fit()

	print(
	f"The p-value obtained from the {test} is {p}",
	f"The p-value obtained from the {formula} linear model is {res.f_pvalue}",
	sep="\n"
	)