gr33ndata/Permutation Testing

## Permutation Testing
# Better run this in a Jupyter notebook

import numpy as np
import pandas as pd

green = [32, 34, 38, 28, 32, 34, 38, 28, 33, 50, 32, 39, 29]
red = [33, 32, 39, 29, 33, 32, 39, 29, 33, 8, 32, 39, 29]

green = pd.Series(green)
red = pd.Series(red)

# Get the mean values of the two groups
green.mean(), red.mean()

# Calculate the means differenece
original_diff = g.mean() - r.mean()
print(original_diff)

# Permutation Test
diffs = []
refuse_null_hype = []

for i in range(6000):

    s = pd.concat([g,r]).sample(frac=1, replace=False, random_state=i)
    g = s.head(int(0.5 * s.shape[0]))
    r = s.tail(int(0.5 * s.shape[0]))

    diff = g.mean() - r.mean()
    diffs.append(diff)

    refuse_null_hype.append(diff >= original_diff)

# P Values
p_value = 0.05
print(
    pd.Series(refuse_numm_hype).mean()
)
print(
   'Can we refuse the null hypothesis?',
    pd.Series(refuse_numm_hype).mean() <= p_value
)

# Plot Results
fig, ax = plt.subplots(1, 1, figsize=(14, 8))

pd.Series(diffs).plot(
    title='Diff'
    kind='kde',
    ax=ax
)

ax.axvline(original_diff)

fig.show()
	# Better run this in a Jupyter notebook

	import numpy as np
	import pandas as pd

	green = [32, 34, 38, 28, 32, 34, 38, 28, 33, 50, 32, 39, 29]
	red = [33, 32, 39, 29, 33, 32, 39, 29, 33, 8, 32, 39, 29]

	green = pd.Series(green)
	red = pd.Series(red)

	# Get the mean values of the two groups
	green.mean(), red.mean()

	# Calculate the means differenece
	original_diff = g.mean() - r.mean()
	print(original_diff)

	# Permutation Test
	diffs = []
	refuse_null_hype = []

	for i in range(6000):

	s = pd.concat([g,r]).sample(frac=1, replace=False, random_state=i)
	g = s.head(int(0.5 * s.shape[0]))
	r = s.tail(int(0.5 * s.shape[0]))

	diff = g.mean() - r.mean()
	diffs.append(diff)

	refuse_null_hype.append(diff >= original_diff)

	# P Values
	p_value = 0.05
	print(
	pd.Series(refuse_numm_hype).mean()
	)
	print(
	'Can we refuse the null hypothesis?',
	pd.Series(refuse_numm_hype).mean() <= p_value
	)

	# Plot Results
	fig, ax = plt.subplots(1, 1, figsize=(14, 8))

	pd.Series(diffs).plot(
	title='Diff'
	kind='kde',
	ax=ax
	)

	ax.axvline(original_diff)

	fig.show()