Skip to content

Instantly share code, notes, and snippets.

@Michael-J-Ward
Last active April 24, 2016 19:41
Show Gist options
  • Save Michael-J-Ward/2dcbe73afd64142c4c6101812004e16d to your computer and use it in GitHub Desktop.
Save Michael-J-Ward/2dcbe73afd64142c4c6101812004e16d to your computer and use it in GitHub Desktop.
Demonstrating Central Limit theorem
# Demonstrating the Central Limit Theorem for MITs Probabilistic Systems course
import numpy as np
import matplotlib.pyplot as plt
def genTest(distribution,**kwargs):
"""Returns a score generating function from the given distribution"""
def test(num_students):
kwargs['size'] = num_students
return distribution(**kwargs)
return test
# hard and easy test taken from normal distribution
# HARD_TEST = genTest(np.random.normal, loc=25, scale=8)
# EASY_TEST = genTest(np.random.normal, loc=75, scale=8)
# hard and easy test taken from uniform distribution
HARD_TEST = genTest(np.random.uniform, low=5, high=25)
EASY_TEST = genTest(np.random.uniform, low=75, high=95)
CLASS_SIZE = 50
def randomTest():
if np.random.random() > 0.5:
return HARD_TEST
else:
return EASY_TEST
# Randomly choose test difficulty and then give everyone that test
# notice that the distribution of the test average is extremely bi-modal (NOT NORMAL)
def experiment1(num_samples):
means = []
for i in range(num_samples):
test = randomTest()
scores = test(CLASS_SIZE)
means.append( scores.mean())
return means
means1 = experiment1(1000)
plt.hist(means1, bins=100)
plt.show()
# Give half the easy test and half the hard test
def experiment2(num_samples):
num_students = int(CLASS_SIZE/2)
means = []
for i in range(num_samples):
easy_scores = EASY_TEST(num_students)
hard_scores = HARD_TEST(num_students)
scores = np.concatenate([easy_scores, hard_scores])
means.append(scores.mean())
return means
means2 = experiment2(1000)
plt.hist(means2, bins=100)
plt.show()
# Students randomly and independently given either an easy or a hard exam
def randomScore():
return randomTest()(1)
def experiment3(num_samples):
means = []
for i in range(num_samples):
scores = np.array([randomScore() for i in range(CLASS_SIZE)])
means.append(scores.mean())
return means
means3 = experiment3(1000)
plt.hist(means3, bins=100)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment