Michael-J-Ward/central_limit_demo.py

## central_limit_demo.py
# Demonstrating the Central Limit Theorem for MITs Probabilistic Systems course

import numpy as np
import matplotlib.pyplot as plt


def genTest(distribution,**kwargs):
	"""Returns a score generating function from the given distribution"""
	def test(num_students):
		kwargs['size'] = num_students
		return distribution(**kwargs)
	return test
# hard and easy test taken from normal distribution
# HARD_TEST = genTest(np.random.normal, loc=25, scale=8)
# EASY_TEST = genTest(np.random.normal, loc=75, scale=8)

# hard and easy test taken from uniform distribution
HARD_TEST = genTest(np.random.uniform, low=5, high=25)
EASY_TEST = genTest(np.random.uniform, low=75, high=95)
CLASS_SIZE = 50

def randomTest():
	if np.random.random() > 0.5:
		return HARD_TEST
	else:
		return EASY_TEST

# Randomly choose test difficulty and then give everyone that test
# notice that the distribution of the test average is extremely bi-modal (NOT NORMAL)
def experiment1(num_samples):
	means = []
	for i in range(num_samples):
		test = randomTest()
		scores = test(CLASS_SIZE)
		means.append( scores.mean())
	return means

means1 = experiment1(1000)
plt.hist(means1, bins=100)
plt.show()

# Give half the easy test and half the hard test
def experiment2(num_samples):
	num_students = int(CLASS_SIZE/2)
	means = []
	for i in range(num_samples):
		easy_scores = EASY_TEST(num_students)
		hard_scores = HARD_TEST(num_students)
		scores = np.concatenate([easy_scores, hard_scores])
		means.append(scores.mean())
	return means


means2 = experiment2(1000)
plt.hist(means2, bins=100)
plt.show()

# Students randomly and independently given either an easy or a hard exam
def randomScore():
	return randomTest()(1)

def experiment3(num_samples):
	means = []
	for i in range(num_samples):
		scores = np.array([randomScore() for i in range(CLASS_SIZE)])
		means.append(scores.mean())
	return means

means3 = experiment3(1000)
plt.hist(means3, bins=100)
plt.show()
	# Demonstrating the Central Limit Theorem for MITs Probabilistic Systems course

	import numpy as np
	import matplotlib.pyplot as plt


	def genTest(distribution,**kwargs):
	"""Returns a score generating function from the given distribution"""
	def test(num_students):
	kwargs['size'] = num_students
	return distribution(**kwargs)
	return test
	# hard and easy test taken from normal distribution
	# HARD_TEST = genTest(np.random.normal, loc=25, scale=8)
	# EASY_TEST = genTest(np.random.normal, loc=75, scale=8)

	# hard and easy test taken from uniform distribution
	HARD_TEST = genTest(np.random.uniform, low=5, high=25)
	EASY_TEST = genTest(np.random.uniform, low=75, high=95)
	CLASS_SIZE = 50

	def randomTest():
	if np.random.random() > 0.5:
	return HARD_TEST
	else:
	return EASY_TEST

	# Randomly choose test difficulty and then give everyone that test
	# notice that the distribution of the test average is extremely bi-modal (NOT NORMAL)
	def experiment1(num_samples):
	means = []
	for i in range(num_samples):
	test = randomTest()
	scores = test(CLASS_SIZE)
	means.append( scores.mean())
	return means

	means1 = experiment1(1000)
	plt.hist(means1, bins=100)
	plt.show()

	# Give half the easy test and half the hard test
	def experiment2(num_samples):
	num_students = int(CLASS_SIZE/2)
	means = []
	for i in range(num_samples):
	easy_scores = EASY_TEST(num_students)
	hard_scores = HARD_TEST(num_students)
	scores = np.concatenate([easy_scores, hard_scores])
	means.append(scores.mean())
	return means


	means2 = experiment2(1000)
	plt.hist(means2, bins=100)
	plt.show()

	# Students randomly and independently given either an easy or a hard exam
	def randomScore():
	return randomTest()(1)

	def experiment3(num_samples):
	means = []
	for i in range(num_samples):
	scores = np.array([randomScore() for i in range(CLASS_SIZE)])
	means.append(scores.mean())
	return means

	means3 = experiment3(1000)
	plt.hist(means3, bins=100)
	plt.show()