Skip to content

Instantly share code, notes, and snippets.

@gr33ndata
Created June 14, 2020 07:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gr33ndata/d25b7cda34836c49db56d3095497c498 to your computer and use it in GitHub Desktop.
Save gr33ndata/d25b7cda34836c49db56d3095497c498 to your computer and use it in GitHub Desktop.
Permutation Testing for my YouTube video about Hypothesis Testing
# Better run this in a Jupyter notebook
import numpy as np
import pandas as pd
green = [32, 34, 38, 28, 32, 34, 38, 28, 33, 50, 32, 39, 29]
red = [33, 32, 39, 29, 33, 32, 39, 29, 33, 8, 32, 39, 29]
green = pd.Series(green)
red = pd.Series(red)
# Get the mean values of the two groups
green.mean(), red.mean()
# Calculate the means differenece
original_diff = g.mean() - r.mean()
print(original_diff)
# Permutation Test
diffs = []
refuse_null_hype = []
for i in range(6000):
s = pd.concat([g,r]).sample(frac=1, replace=False, random_state=i)
g = s.head(int(0.5 * s.shape[0]))
r = s.tail(int(0.5 * s.shape[0]))
diff = g.mean() - r.mean()
diffs.append(diff)
refuse_null_hype.append(diff >= original_diff)
# P Values
p_value = 0.05
print(
pd.Series(refuse_numm_hype).mean()
)
print(
'Can we refuse the null hypothesis?',
pd.Series(refuse_numm_hype).mean() <= p_value
)
# Plot Results
fig, ax = plt.subplots(1, 1, figsize=(14, 8))
pd.Series(diffs).plot(
title='Diff'
kind='kde',
ax=ax
)
ax.axvline(original_diff)
fig.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment