trjordan/gist:2721175

## gistfile1.py
import cProfile
import sys
import math
import numpy
import random

def mean(values):
    if values:
        return sum(values) / len(values)
    else:
        return None

def adaptive_confidence_interval(values, iterations=1000, alpha=0.05):
    """ Get the confidence in as few iterations as possible, dynamically."""
    cur_iterations = 10
    try_larger = True
    NUM_TRIES = 4
    THRESHOLD = 0.1
    while try_larger:
        cis = [confidence_interval(values, iterations=cur_iterations, alpha=alpha)
               for _ in range(NUM_TRIES)]
        ranges = [d['upper'] - d['lower'] for d in cis]
        frac_var = (max(ranges) - min(ranges)) / mean(ranges)
        if frac_var > THRESHOLD and cur_iterations < iterations:
            cur_iterations *= 2
        else:
            # Just pick one and return it
            return cis[0]


def confidence_interval(values, iterations=1000, alpha=0.05):
    """Return the confidence interval of the mean."""

    n = len(values) # Sample size to resample with
    value = mean(values) # Estimator

    # Bootstrap the standard error by drawing a bunch of choices from the given
    # values, then calculating the variance of that synthetic distribution.
    synthetic_values = []
    for _ in xrange(iterations):
        choices = [values[random.randint(0, n-1)] for _ in xrange(n)]
        current_mean = mean(choices)
        synthetic_values.append(current_mean)

    # Compute the confidence interval using pivotal intervals
    synthetic_values.sort()
    lower_index = int(math.floor(iterations * ( 1 - alpha / 2 ) ))
    lower = 2 * value - synthetic_values[lower_index]
    upper_index = int(math.floor(iterations * alpha / 2 ))
    upper = 2 * value - synthetic_values[upper_index]
    return {'lower': lower,
            'upper': upper,
            'value': value,
            'num': len(values)}

def main(num_iterations):
    values = [random.randint(0, 1000) for _ in xrange(num_iterations)]
    return adaptive_confidence_interval(values)

if __name__ == '__main__':
    if len(sys.argv) < 3:
        print "Usage: python confidence <profiler> <num_iterations>"
        exit(1)
    profiler = sys.argv[1]
    num_iterations = int(sys.argv[2])
    if profiler == 'cProfile':
        cProfile.runctx('main(num_iterations)', globals(), locals(),
                        filename='confidence.py.cProfile')
    elif profiler == 'statprof':
        # Not in standard library -- pip install statprof
        import statprof
        statprof.start()
        try:
            main(num_iterations)
        finally:
            statprof.stop()
        # No persistent output -- just display the results.
        statprof.display()
    elif profiler == 'line_profiler':
        # Not in standard library -- pip install line_profiler
        #
        # CLI only. Add the @profile decorator to functions above, and run from
        # the command line like so:
        #    kernprof.py -l confidence.py -o confidence.lprof
        # And view the results:
        #    python -m line_profiler confidence.py.lprof
        main(num_iterations)
        print 'good'
    else:
        print 'unknown profiler type'
        exit(1)
	import cProfile
	import sys
	import math
	import numpy
	import random

	def mean(values):
	if values:
	return sum(values) / len(values)
	else:
	return None

	def adaptive_confidence_interval(values, iterations=1000, alpha=0.05):
	""" Get the confidence in as few iterations as possible, dynamically."""
	cur_iterations = 10
	try_larger = True
	NUM_TRIES = 4
	THRESHOLD = 0.1
	while try_larger:
	cis = [confidence_interval(values, iterations=cur_iterations, alpha=alpha)
	for _ in range(NUM_TRIES)]
	ranges = [d['upper'] - d['lower'] for d in cis]
	frac_var = (max(ranges) - min(ranges)) / mean(ranges)
	if frac_var > THRESHOLD and cur_iterations < iterations:
	cur_iterations *= 2
	else:
	# Just pick one and return it
	return cis[0]


	def confidence_interval(values, iterations=1000, alpha=0.05):
	"""Return the confidence interval of the mean."""

	n = len(values) # Sample size to resample with
	value = mean(values) # Estimator

	# Bootstrap the standard error by drawing a bunch of choices from the given
	# values, then calculating the variance of that synthetic distribution.
	synthetic_values = []
	for _ in xrange(iterations):
	choices = [values[random.randint(0, n-1)] for _ in xrange(n)]
	current_mean = mean(choices)
	synthetic_values.append(current_mean)

	# Compute the confidence interval using pivotal intervals
	synthetic_values.sort()
	lower_index = int(math.floor(iterations * ( 1 - alpha / 2 ) ))
	lower = 2 * value - synthetic_values[lower_index]
	upper_index = int(math.floor(iterations * alpha / 2 ))
	upper = 2 * value - synthetic_values[upper_index]
	return {'lower': lower,
	'upper': upper,
	'value': value,
	'num': len(values)}

	def main(num_iterations):
	values = [random.randint(0, 1000) for _ in xrange(num_iterations)]
	return adaptive_confidence_interval(values)

	if __name__ == '__main__':
	if len(sys.argv) < 3:
	print "Usage: python confidence <profiler> <num_iterations>"
	exit(1)
	profiler = sys.argv[1]
	num_iterations = int(sys.argv[2])
	if profiler == 'cProfile':
	cProfile.runctx('main(num_iterations)', globals(), locals(),
	filename='confidence.py.cProfile')
	elif profiler == 'statprof':
	# Not in standard library -- pip install statprof
	import statprof
	statprof.start()
	try:
	main(num_iterations)
	finally:
	statprof.stop()
	# No persistent output -- just display the results.
	statprof.display()
	elif profiler == 'line_profiler':
	# Not in standard library -- pip install line_profiler
	#
	# CLI only. Add the @profile decorator to functions above, and run from
	# the command line like so:
	# kernprof.py -l confidence.py -o confidence.lprof
	# And view the results:
	# python -m line_profiler confidence.py.lprof
	main(num_iterations)
	print 'good'
	else:
	print 'unknown profiler type'
	exit(1)