Skip to content

Instantly share code, notes, and snippets.

@stevenobadja
Created August 30, 2017 07:23
Show Gist options
  • Save stevenobadja/070c8b1028cef18e01ee09d601b9402f to your computer and use it in GitHub Desktop.
Save stevenobadja/070c8b1028cef18e01ee09d601b9402f to your computer and use it in GitHub Desktop.
CLT
import numpy as np
import pandas as pd
import scipy
from scipy.stats import ttest_ind
import matplotlib.pyplot as plt
%matplotlib inline
pop1 = np.random.binomial(10, 0.2, 10000)
pop2 = np.random.binomial(10,0.5, 10000)
plt.hist(pop1, alpha=0.5, label='Population 1')
plt.hist(pop2, alpha=0.5, label='Population 2')
plt.legend(loc='upper right')
plt.axvline(pop1.mean(), color='r', linestyle='solid', linewidth=2)
plt.axvline(pop1.mean() + pop1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(pop1.mean() - pop1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(pop2.mean(), color='g', linestyle='solid', linewidth=2)
plt.axvline(pop2.mean() + pop2.std(), color='g', linestyle='dashed', linewidth=2)
plt.axvline(pop2.mean() - pop2.std(), color='g', linestyle='dashed', linewidth=2)
plt.show()
print('Population 1 Mean: {}'.format(pop1.mean()))
print('Population 2 Mean: {}'.format(pop2.mean()))
print('Population 1 Standard Deviation: {}'.format(pop1.std()))
print('Population 2 Standard Deviation: {}'.format(pop2.std()))
sample1 = np.random.choice(pop1, 100, replace=True)
sample2 = np.random.choice(pop2, 100, replace=True)
plt.hist(sample1, alpha=0.5, label='sample 1')
plt.hist(sample2, alpha=0.5, label='sample 2')
plt.legend(loc='upper right')
plt.axvline(sample1.mean(), color='r', linestyle='solid', linewidth=2)
plt.axvline(sample1.mean() + sample1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(sample1.mean() - sample1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(sample2.mean(), color='g', linestyle='solid', linewidth=2)
plt.axvline(sample2.mean() + sample2.std(), color='g', linestyle='dashed', linewidth=2)
plt.axvline(sample2.mean() - sample2.std(), color='g', linestyle='dashed', linewidth=2)
plt.show()
print('Sample 1 Mean: {}'.format(sample1.mean()))
print('Sample 2 Mean: {}'.format(sample2.mean()))
print('Sample 1 Standard Deviation: {}'.format(sample1.std()))
print('Sample 2 Standard Deviation: {}'.format(sample2.std()))
sample3 = np.random.choice(pop1, 1000, replace=True)
sample4 = np.random.choice(pop2, 1000, replace=True)
plt.hist(sample3, alpha=0.5, label='sample 3')
plt.hist(sample4, alpha=0.5, label='sample 4')
plt.legend(loc='upper right')
plt.axvline(sample3.mean(), color='r', linestyle='solid', linewidth=2)
plt.axvline(sample3.mean() + sample3.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(sample3.mean() - sample3.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(sample4.mean(), color='g', linestyle='solid', linewidth=2)
plt.axvline(sample4.mean() + sample4.std(), color='g', linestyle='dashed', linewidth=2)
plt.axvline(sample4.mean() - sample4.std(), color='g', linestyle='dashed', linewidth=2)
plt.show()
print('Sample 3 Mean: {}'.format(sample3.mean()))
print('Sample 4 Mean: {}'.format(sample4.mean()))
print('Sample 3 Standard Deviation: {}'.format(sample3.std()))
print('Sample 4 Standard Deviation: {}'.format(sample4.std()))
sample5 = np.random.choice(pop1, 20, replace=True)
sample6 = np.random.choice(pop2, 20, replace=True)
plt.hist(sample5, alpha=0.5, label='sample 5')
plt.hist(sample6, alpha=0.5, label='sample 6')
plt.legend(loc='upper right')
plt.axvline(sample5.mean(), color='r', linestyle='solid', linewidth=2)
plt.axvline(sample5.mean() + sample5.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(sample5.mean() - sample5.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(sample6.mean(), color='g', linestyle='solid', linewidth=2)
plt.axvline(sample6.mean() + sample6.std(), color='g', linestyle='dashed', linewidth=2)
plt.axvline(sample6.mean() - sample6.std(), color='g', linestyle='dashed', linewidth=2)
plt.show()
print('Sample 5 Mean: {}'.format(sample5.mean()))
print('Sample 6 Mean: {}'.format(sample6.mean()))
print('Sample 5 Standard Deviation: {}'.format(sample5.std()))
print('Sample 6 Standard Deviation: {}'.format(sample6.std()))
print ('Range appears similar, however number of frequecy differs due to sample size and lower samples appear to have more overlapping points')
pop1 = np.random.binomial(10, 0.3, 10000)
pop2 = np.random.binomial(10,0.5, 10000)
plt.hist(pop1, alpha=0.5, label='Population 1')
plt.hist(pop2, alpha=0.5, label='Population 2')
plt.legend(loc='upper right')
plt.axvline(pop1.mean(), color='r', linestyle='solid', linewidth=2)
plt.axvline(pop1.mean() + pop1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(pop1.mean() - pop1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(pop2.mean(), color='g', linestyle='solid', linewidth=2)
plt.axvline(pop2.mean() + pop2.std(), color='g', linestyle='dashed', linewidth=2)
plt.axvline(pop2.mean() - pop2.std(), color='g', linestyle='dashed', linewidth=2)
plt.show()
print('Population 1 Mean: {}'.format(pop1.mean()))
print('Population 2 Mean: {}'.format(pop2.mean()))
print('Population 1 Standard Deviation: {}'.format(pop1.std()))
print('Population 2 Standard Deviation: {}'.format(pop2.std()))
sample1 = np.random.choice(pop1, 100, replace=True)
sample2 = np.random.choice(pop2, 100, replace=True)
plt.hist(sample1, alpha=0.5, label='sample 1')
plt.hist(sample2, alpha=0.5, label='sample 2')
plt.legend(loc='upper right')
plt.axvline(sample1.mean(), color='r', linestyle='solid', linewidth=2)
plt.axvline(sample1.mean() + sample1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(sample1.mean() - sample1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(sample2.mean(), color='g', linestyle='solid', linewidth=2)
plt.axvline(sample2.mean() + sample2.std(), color='g', linestyle='dashed', linewidth=2)
plt.axvline(sample2.mean() - sample2.std(), color='g', linestyle='dashed', linewidth=2)
plt.show()
print('Sample 1 Mean: {}'.format(sample1.mean()))
print('Sample 2 Mean: {}'.format(sample2.mean()))
print('Sample 1 Standard Deviation: {}'.format(sample1.std()))
print('Sample 2 Standard Deviation: {}'.format(sample2.std()))
diff = sample2.mean( ) - sample1.mean()
print('Sample difference of: {}'.format(diff))
size = np.array([len(sample1), len(sample2)])
sd = np.array([sample1.std(), sample2.std()])
diff_se = (sum(sd ** 2 / size)) ** 0.5
print('T-Value: {}'.format(diff/diff_se))
print(ttest_ind(sample2, sample1, equal_var=False))
pop1 = np.random.binomial(10, 0.4, 10000)
pop2 = np.random.binomial(10,0.5, 10000)
plt.hist(pop1, alpha=0.5, label='Population 1')
plt.hist(pop2, alpha=0.5, label='Population 2')
plt.legend(loc='upper right')
plt.axvline(pop1.mean(), color='r', linestyle='solid', linewidth=2)
plt.axvline(pop1.mean() + pop1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(pop1.mean() - pop1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(pop2.mean(), color='g', linestyle='solid', linewidth=2)
plt.axvline(pop2.mean() + pop2.std(), color='g', linestyle='dashed', linewidth=2)
plt.axvline(pop2.mean() - pop2.std(), color='g', linestyle='dashed', linewidth=2)
plt.show()
print('Population 1 Mean: {}'.format(pop1.mean()))
print('Population 2 Mean: {}'.format(pop2.mean()))
print('Population 1 Standard Deviation: {}'.format(pop1.std()))
print('Population 2 Standard Deviation: {}'.format(pop2.std()))
sample1 = np.random.choice(pop1, 100, replace=True)
sample2 = np.random.choice(pop2, 100, replace=True)
plt.hist(sample1, alpha=0.5, label='sample 1')
plt.hist(sample2, alpha=0.5, label='sample 2')
plt.legend(loc='upper right')
plt.axvline(sample1.mean(), color='r', linestyle='solid', linewidth=2)
plt.axvline(sample1.mean() + sample1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(sample1.mean() - sample1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(sample2.mean(), color='g', linestyle='solid', linewidth=2)
plt.axvline(sample2.mean() + sample2.std(), color='g', linestyle='dashed', linewidth=2)
plt.axvline(sample2.mean() - sample2.std(), color='g', linestyle='dashed', linewidth=2)
plt.show()
print('Sample 1 Mean: {}'.format(sample1.mean()))
print('Sample 2 Mean: {}'.format(sample2.mean()))
print('Sample 1 Standard Deviation: {}'.format(sample1.std()))
print('Sample 2 Standard Deviation: {}'.format(sample2.std()))
diff = sample2.mean( ) - sample1.mean()
print('Sample difference of: {}'.format(diff))
size = np.array([len(sample1), len(sample2)])
sd = np.array([sample1.std(), sample2.std()])
diff_se = (sum(sd ** 2 / size)) ** 0.5
print('T-Value: {}'.format(diff/diff_se))
print(ttest_ind(sample2, sample1, equal_var=False))
print ('Mean and Standard Deviation are closer together, less difference in samples')
pop1 = np.random.gumbel(10, 0.4, 10000)
pop2 = np.random.gumbel(10,0.5, 10000)
plt.hist(pop1, alpha=0.5, label='Population 1')
plt.hist(pop2, alpha=0.5, label='Population 2')
plt.legend(loc='upper right')
plt.axvline(pop1.mean(), color='r', linestyle='solid', linewidth=2)
plt.axvline(pop1.mean() + pop1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(pop1.mean() - pop1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(pop2.mean(), color='g', linestyle='solid', linewidth=2)
plt.axvline(pop2.mean() + pop2.std(), color='g', linestyle='dashed', linewidth=2)
plt.axvline(pop2.mean() - pop2.std(), color='g', linestyle='dashed', linewidth=2)
plt.show()
print('Population 1 Mean: {}'.format(pop1.mean()))
print('Population 2 Mean: {}'.format(pop2.mean()))
print('Population 1 Standard Deviation: {}'.format(pop1.std()))
print('Population 2 Standard Deviation: {}'.format(pop2.std()))
sample1 = np.random.choice(pop1, 100, replace=True)
sample2 = np.random.choice(pop2, 100, replace=True)
plt.hist(sample1, alpha=0.5, label='sample 1')
plt.hist(sample2, alpha=0.5, label='sample 2')
plt.legend(loc='upper right')
plt.axvline(sample1.mean(), color='r', linestyle='solid', linewidth=2)
plt.axvline(sample1.mean() + sample1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(sample1.mean() - sample1.std(), color='r', linestyle='dashed', linewidth=2)
plt.axvline(sample2.mean(), color='g', linestyle='solid', linewidth=2)
plt.axvline(sample2.mean() + sample2.std(), color='g', linestyle='dashed', linewidth=2)
plt.axvline(sample2.mean() - sample2.std(), color='g', linestyle='dashed', linewidth=2)
plt.show()
print('Sample 1 Mean: {}'.format(sample1.mean()))
print('Sample 2 Mean: {}'.format(sample2.mean()))
print('Sample 1 Standard Deviation: {}'.format(sample1.std()))
print('Sample 2 Standard Deviation: {}'.format(sample2.std()))
diff = sample2.mean( ) - sample1.mean()
print('Sample difference of: {}'.format(diff))
size = np.array([len(sample1), len(sample2)])
sd = np.array([sample1.std(), sample2.std()])
diff_se = (sum(sd ** 2 / size)) ** 0.5
print('T-Value: {}'.format(diff/diff_se))
print(ttest_ind(sample2, sample1, equal_var=False))
print ('Yes, Mean values in sample still represents the population')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment