Skip to content

Instantly share code, notes, and snippets.

@chaipi-chaya
Last active October 23, 2019 18:41
Show Gist options
  • Save chaipi-chaya/e50682e66061199b50da55cb626586cb to your computer and use it in GitHub Desktop.
Save chaipi-chaya/e50682e66061199b50da55cb626586cb to your computer and use it in GitHub Desktop.
Illustration with Python Central Limit Theorem | sample size increases the probability that sample mean is further from population mean than error
import numpy as np
import random
import matplotlib.pyplot as plt
# Step 1
## show that as the sample size increases the mean of sample is close to population mean
# build gamma distribution as population
shape, scale = 2., 2. # mean=4, std=2*sqrt(2)
mu = shape*scale # mean
s = np.random.gamma(shape, scale, 1000000)
# margin of error
epsilon = 0.05
# Step 2
# list of probability of each sample size
proberror = []
# sample size for plotting
samplesize = []
# for each sample size
for n in range(100,10101,500):
# start count
c = 0
for i in range(0,100):
# sample n sample
rs = random.choices(s, k=n)
# calculate mean
mean = sum(rs)/len(rs)
# check if the difference is larger than error
if abs(mean - mu) > epsilon:
# if larger count the sampling
c += 1
# Step 3
# calculate the probability
proberror.append(c/100)
# save sample size for plotting
samplesize.append(n)
# Step 4
# set figure size.
plt.figure(figsize=(20,10))
# plot each probability.
plt.plot(samplesize,proberror, marker='o')
# show plot.
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment