mamelara/null_dist.py

## null_dist.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

population = pd.read_csv("https://raw.githubusercontent.com/genomicsclass/dagdata/master/inst/extdata/femaleMiceWeights.csv")

control = population[population["Diet"] == "chow"]
treatment = population[population["Diet"] == "hf"]

obs = treatment["Bodyweight"].mean() - control["Bodyweight"].mean()

null_distribution = []
for i in range(10000):
    control = population.sample(12)
    treatment = population.sample(12)
    null_distribution.append((treatment["Bodyweight"].mean() -
                              control["Bodyweight"].mean()))

def is_greater_than_obs(num):
    return num > obs

def mean(lst):
    sum(lst)/len(lst)

p_val= mean(filter(is_greater_than_obs, null_distribution))

print("P-value={0}".format(p_val))
plt.hist(null_distribution)
plt.title("Null distribution for female mice")
plt.xlabel("Mean")
plt.xlabel("Frequency")

plt.show()
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt

	population = pd.read_csv("https://raw.githubusercontent.com/genomicsclass/dagdata/master/inst/extdata/femaleMiceWeights.csv")

	control = population[population["Diet"] == "chow"]
	treatment = population[population["Diet"] == "hf"]

	obs = treatment["Bodyweight"].mean() - control["Bodyweight"].mean()

	null_distribution = []
	for i in range(10000):
	control = population.sample(12)
	treatment = population.sample(12)
	null_distribution.append((treatment["Bodyweight"].mean() -
	control["Bodyweight"].mean()))

	def is_greater_than_obs(num):
	return num > obs

	def mean(lst):
	sum(lst)/len(lst)

	p_val= mean(filter(is_greater_than_obs, null_distribution))

	print("P-value={0}".format(p_val))
	plt.hist(null_distribution)
	plt.title("Null distribution for female mice")
	plt.xlabel("Mean")
	plt.xlabel("Frequency")

	plt.show()