Skip to content

Instantly share code, notes, and snippets.

@timbuckley
Last active June 4, 2017 23:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timbuckley/fdefffbf3b879270c7839cf253631532 to your computer and use it in GitHub Desktop.
Save timbuckley/fdefffbf3b879270c7839cf253631532 to your computer and use it in GitHub Desktop.
Simulates doing a thousand polls at a 1.5k sample size from a population of 256 million. Most any poll was off was by 4.5%, while average variance was just 1.19%.
import random
american_adult_population = int(245e6)
# Warning: This array is HUGE!
# The array is just the numbers from 0 to 245,000,000
usa_array = range(american_adult_population)
actual_average = float(american_adult_population / 2)
sample_amount = 1500
def make_sample(population, k=sample_amount):
"""
Given a population (and an optional sample amount (k)), return a tuple of:
- the average of the same
- the absolute difference between the sample average and real average
- the percent difference between the sample average and real average
"""
sampling = random.sample(usa_array, k)
avg = average(sampling)
return (
int(avg),
abs(avg - actual_average),
round(abs((avg - actual_average)/actual_average) * 100, 4),
)
def average(nums):
total = 0
for num in nums:
total += num
return float(total) / len(nums)
samples = [make_sample(usa_array) for _ in range(1000)]
if __name__ == "__main__":
percents_sample_is_off_by = [p for _, _, p in samples]
print (
max(percents_sample_is_off_by),
average(percents_sample_is_off_by)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment