chaipi-chaya/sampleSizeIncreaseLine.py

## sampleSizeIncreaseLine.py
import numpy as np
import random
import matplotlib.pyplot as plt

# Step 1
## show that as the sample size increases the mean of sample is close to population mean
# build gamma distribution as population
shape, scale = 2., 2.  # mean=4, std=2*sqrt(2)
mu = shape*scale # mean
s = np.random.gamma(shape, scale, 1000000)
# margin of error
epsilon = 0.05

# Step 2
# list of probability of each sample size
proberror = []
# sample size for plotting
samplesize = []

# for each sample size
for n in range(100,10101,500):
    # start count
    c = 0
    for i in range(0,100):
        # sample n sample
        rs = random.choices(s, k=n)
        # calculate mean
        mean = sum(rs)/len(rs)
        # check if the difference is larger than error
        if abs(mean - mu) > epsilon:
            # if larger count the sampling
            c += 1
    # Step 3
    # calculate the probability
    proberror.append(c/100)
    # save sample size for plotting
    samplesize.append(n)

# Step 4
# set figure size.
plt.figure(figsize=(20,10))
# plot each probability.
plt.plot(samplesize,proberror, marker='o')
# show plot.
plt.show()
	import numpy as np
	import random
	import matplotlib.pyplot as plt

	# Step 1
	## show that as the sample size increases the mean of sample is close to population mean
	# build gamma distribution as population
	shape, scale = 2., 2. # mean=4, std=2*sqrt(2)
	mu = shape*scale # mean
	s = np.random.gamma(shape, scale, 1000000)
	# margin of error
	epsilon = 0.05

	# Step 2
	# list of probability of each sample size
	proberror = []
	# sample size for plotting
	samplesize = []

	# for each sample size
	for n in range(100,10101,500):
	# start count
	c = 0
	for i in range(0,100):
	# sample n sample
	rs = random.choices(s, k=n)
	# calculate mean
	mean = sum(rs)/len(rs)
	# check if the difference is larger than error
	if abs(mean - mu) > epsilon:
	# if larger count the sampling
	c += 1
	# Step 3
	# calculate the probability
	proberror.append(c/100)
	# save sample size for plotting
	samplesize.append(n)

	# Step 4
	# set figure size.
	plt.figure(figsize=(20,10))
	# plot each probability.
	plt.plot(samplesize,proberror, marker='o')
	# show plot.
	plt.show()