timbuckley/how-effective-sampling-is.py

## how-effective-sampling-is.py
import random

american_adult_population = int(245e6)
 # Warning: This array is HUGE!
 # The array is just the numbers from 0 to 245,000,000
usa_array = range(american_adult_population)
actual_average = float(american_adult_population / 2)

sample_amount = 1500

def make_sample(population, k=sample_amount):
    """
    Given a population (and an optional sample amount (k)), return a tuple of:
        - the average of the same
        - the absolute difference between the sample average and real average
        - the percent difference between the sample average and real average
    """
    sampling = random.sample(usa_array, k)
    avg = average(sampling)
    return (
        int(avg),
        abs(avg - actual_average),
        round(abs((avg - actual_average)/actual_average) * 100, 4),
    )

def average(nums):
    total = 0
    for num in nums:
        total += num
    return float(total) / len(nums)


samples = [make_sample(usa_array) for _ in range(1000)]

if __name__ == "__main__":
    percents_sample_is_off_by = [p for _, _, p in samples]
    print (
        max(percents_sample_is_off_by),
        average(percents_sample_is_off_by)
    )
	import random

	american_adult_population = int(245e6)
	# Warning: This array is HUGE!
	# The array is just the numbers from 0 to 245,000,000
	usa_array = range(american_adult_population)
	actual_average = float(american_adult_population / 2)

	sample_amount = 1500

	def make_sample(population, k=sample_amount):
	"""
	Given a population (and an optional sample amount (k)), return a tuple of:
	- the average of the same
	- the absolute difference between the sample average and real average
	- the percent difference between the sample average and real average
	"""
	sampling = random.sample(usa_array, k)
	avg = average(sampling)
	return (
	int(avg),
	abs(avg - actual_average),
	round(abs((avg - actual_average)/actual_average) * 100, 4),
	)

	def average(nums):
	total = 0
	for num in nums:
	total += num
	return float(total) / len(nums)


	samples = [make_sample(usa_array) for _ in range(1000)]

	if __name__ == "__main__":
	percents_sample_is_off_by = [p for _, _, p in samples]
	print (
	max(percents_sample_is_off_by),
	average(percents_sample_is_off_by)
	)