tingletech/bayesian_survey_analysis.py

## bayesian_survey_analysis.py
#!/usr/bin/env python

"""
porting R code to python
http://stats.stackexchange.com/questions/39319/bayesian-user-survey-with-a-credible-interval
"""

# pip install numpy
from numpy.random.mtrand import dirichlet
from numpy import array, sum, percentile

categories = [
    'k-12 teacher or librarian',
    'k-12 student',
    'college or graduate student',
    'faculty or academic researcher',
    'archivist or librarian',
    'genealogist or family researcher',
    'other'
]

observations = [65, 71, 532, 307, 369, 234, 584]

# just checking
assert len(categories) == len(observations)

# run 1000 simulations
simulations = 1000

# set up a standard library array to hold the simulation results
results = []

# monte carlo method
for n in xrange(simulations):
    results.append( dirichlet([x+1 for x in observations]) )

# convert results to a numpy array for better indexing/sums
results = array(results)

# calculate estimates
estimates = sum(results, axis=0) / simulations

# calculate credible interval and print results
for k in xrange(len(observations)):
    q025 = "{:.0%}".format(percentile(results[:,k],  2.5))
    q975 = "{:.0%}".format(percentile(results[:,k], 97.5))
    percent = "{:.0%}".format(estimates[k])
    print categories[k], q025, percent, q975
	#!/usr/bin/env python

	"""
	porting R code to python
	http://stats.stackexchange.com/questions/39319/bayesian-user-survey-with-a-credible-interval
	"""

	# pip install numpy
	from numpy.random.mtrand import dirichlet
	from numpy import array, sum, percentile

	categories = [
	'k-12 teacher or librarian',
	'k-12 student',
	'college or graduate student',
	'faculty or academic researcher',
	'archivist or librarian',
	'genealogist or family researcher',
	'other'
	]

	observations = [65, 71, 532, 307, 369, 234, 584]

	# just checking
	assert len(categories) == len(observations)

	# run 1000 simulations
	simulations = 1000

	# set up a standard library array to hold the simulation results
	results = []

	# monte carlo method
	for n in xrange(simulations):
	results.append( dirichlet([x+1 for x in observations]) )

	# convert results to a numpy array for better indexing/sums
	results = array(results)

	# calculate estimates
	estimates = sum(results, axis=0) / simulations

	# calculate credible interval and print results
	for k in xrange(len(observations)):
	q025 = "{:.0%}".format(percentile(results[:,k], 2.5))
	q975 = "{:.0%}".format(percentile(results[:,k], 97.5))
	percent = "{:.0%}".format(estimates[k])
	print categories[k], q025, percent, q975