Skip to content

Instantly share code, notes, and snippets.

@camriddell
Last active March 7, 2023 00:51
Show Gist options
  • Save camriddell/d160a62a87713285afd82a53da3ca879 to your computer and use it in GitHub Desktop.
Save camriddell/d160a62a87713285afd82a53da3ca879 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
from pathlib import Path
from numpy import sqrt, loadtxt, isclose
from pandas import read_table
from scipy.stats import t
def ttest(sample1, sample2):
vn1, vn2 = [s.var() / s.size for s in [sample1, sample2]]
# assume different sample sizes, unequal variance
pooled_var = sqrt(vn1 + vn2)
dof = (vn1 + vn2)**2 / (vn1**2 / (sample1.size-1) + vn2**2 / (sample2.size-1))
# two-tailed test
test_statistic = (sample1.mean() - sample2.mean()) / pooled_var
p_value = t.cdf(-abs(test_statistic), df=dof) * 2
return p_value
data_dir = Path('data')
raw_data = loadtxt(data_dir / 'data.txt')
sample1, sample2 = raw_data[:, 0], raw_data[:, 1]
numpy_p_value = ttest(sample1, sample2)
raw_data = read_table(data_dir / 'data.txt', header=None, sep=' ')
sample1, sample2 = raw_data.iloc[:, 0], raw_data.iloc[:, 1]
pandas_p_value = ttest(sample1, sample2)
assert not isclose(numpy_p_value, pandas_p_value)
#!/usr/bin/env python3
from itertools import combinations
from pandas import read_csv, DataFrame, cut
from scipy.stats import ttest_ind
data = read_csv('data.csv')
results = (
DataFrame.from_dict({
(col1, col2): ttest_ind(data[col1], data[col2])._asdict()
for col1, col2 in combinations(data, r=2)
}, orient='index')
.rename_axis(['lhs', 'rhs'])
.assign(
significance=lambda d: cut(
d['pvalue'],
bins=[0, .0001, .001, .05, .07, float('inf')],
labels=['***', '**', '*', '~', '']
),
)
)
#!/usr/bin/env python3
from pathlib import Path
from pandas import read_csv
from statsmodels.formula.api import ols
data_dir = Path('data')
member_data = read_csv(data_dir / 'data.csv')
model = ols('monthly_spend ~ standardize(income)', data=member_data)
fit = model.fit()
assert fit.params['standardize(income)'] < 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment