Last active
October 23, 2019 18:41
-
-
Save chaipi-chaya/e50682e66061199b50da55cb626586cb to your computer and use it in GitHub Desktop.
Illustration with Python Central Limit Theorem | sample size increases the probability that sample mean is further from population mean than error
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import random | |
import matplotlib.pyplot as plt | |
# Step 1 | |
## show that as the sample size increases the mean of sample is close to population mean | |
# build gamma distribution as population | |
shape, scale = 2., 2. # mean=4, std=2*sqrt(2) | |
mu = shape*scale # mean | |
s = np.random.gamma(shape, scale, 1000000) | |
# margin of error | |
epsilon = 0.05 | |
# Step 2 | |
# list of probability of each sample size | |
proberror = [] | |
# sample size for plotting | |
samplesize = [] | |
# for each sample size | |
for n in range(100,10101,500): | |
# start count | |
c = 0 | |
for i in range(0,100): | |
# sample n sample | |
rs = random.choices(s, k=n) | |
# calculate mean | |
mean = sum(rs)/len(rs) | |
# check if the difference is larger than error | |
if abs(mean - mu) > epsilon: | |
# if larger count the sampling | |
c += 1 | |
# Step 3 | |
# calculate the probability | |
proberror.append(c/100) | |
# save sample size for plotting | |
samplesize.append(n) | |
# Step 4 | |
# set figure size. | |
plt.figure(figsize=(20,10)) | |
# plot each probability. | |
plt.plot(samplesize,proberror, marker='o') | |
# show plot. | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment