JohannesBuchner/toy.py

## toy.py
import numpy
from numpy import log, isnan, isfinite, sin, cos, tan, abs, any, pi
import scipy, scipy.stats
import pymultinest
import json
import sys
import matplotlib.pyplot as plt

numpy.random.seed(1)
outputfiles_basename = "mnchains_toy_"

print 'loading data...'
data = numpy.loadtxt(sys.argv[1],
	dtype=[(colname, 'f') for colname in 'x', 'x_err', 'y', 'y_err', 'cor'],
	skiprows=1)

allsamples = []

# convert each data point description into many samples from its error distribution
for row in data:
	x, x_err, y, y_err, cor = row
	cov = [[x_err**2, cor*(x_err*y_err)], [cor*(x_err*y_err), y_err**2]]
	allsamples.append(scipy.random.multivariate_normal([x, y], cov, size=40))

allsamples = numpy.array(allsamples)

x = allsamples[:,:,0]
y = allsamples[:,:,1]
print 'loading data... done'

def prior(cube, ndim, nparams):
	# angle of line
	cube[0] = (cube[0] - 0.5) * pi
	# origin
	cube[1] = cube[1] - 0.5
	# intrinsic scatter, orthogonal to line; log
	cube[2] = cube[2] * 3 - 2

plot = False

def likelihood(cube, ndim, nparams):
	try:
		angle = cube[0]
		origin = cube[1]
		logscatter = cube[2]
		rv = scipy.stats.norm(0, 10**logscatter)
		#rv = scipy.stats.t(1, 0, 10**logscatter)

		# compute the density at x/y of the postulated line relation
		# compute distance from line
		xline = ((y - origin) / sin(angle)) * cos(angle)
		distance = abs(x - xline) * tan(angle)
		rowlike = rv.pdf(distance).mean(axis=1)
		totalloglike = numpy.log(rowlike).sum()
		if plot:
			print xline.shape, y.shape
			plt.plot(xline, y, '+', color='orange')
		#print totalloglike, angle, origin, logscatter, numpy.abs(distance).max()
		if not isfinite(totalloglike): # NaNs, Infs
			return -1e300
		else:
			return totalloglike
	except Exception as e:
		print e
		sys.exit(1)

# number of dimensions our problem has
parameters = ["angle", "origin", "logscatter"]
n_params = len(parameters)
# run MultiNest
pymultinest.run(likelihood, prior, n_params, outputfiles_basename=outputfiles_basename,
	importance_nested_sampling = False, n_live_points=400, resume = True, verbose = True,
	mode_tolerance=1)

# lets analyse the results
a = pymultinest.Analyzer(n_params = n_params, outputfiles_basename=outputfiles_basename)
stats = a.get_stats()
# store name of parameters, always useful
with file('%sparams.json' % a.outputfiles_basename, 'w') as f:
	json.dump(parameters, f, indent=2)
# store derived stats
with open('%sstats.json' % a.outputfiles_basename, mode='w') as f:
	json.dump(stats, f, indent=2)


plt.figure(figsize=(7,7))
plt.plot(data['x'], data['y'], 'x', ms=3, color='k', alpha=0.1)
plt.xlim(data['x'].min() - 0.1, data['x'].max() + 0.1)
plt.ylim(data['y'].min() - 0.1, data['y'].max() + 0.1)
plt.xlabel('x')
plt.ylabel('y')

def plot_line(angle, origin, logscatter, **kwargs):
	t = numpy.linspace(-10, 10, 40)
	x = t * cos(angle)
	y = t * sin(angle) + origin
	plt.plot(x, y, '-', **kwargs)
	x1 = x + 10**logscatter * sin(angle)
	y1 = y + 10**logscatter * cos(angle)
	plt.plot(x1, y1, '--', **kwargs)
	x1 = x - 10**logscatter * sin(angle)
	y1 = y - 10**logscatter * cos(angle)
	plt.plot(x1, y1, '--', **kwargs)

for angle, origin, logscatter in a.get_equal_weighted_posterior()[:40,:-1]:
	plot_line(angle, origin, logscatter, color='red', alpha=0.1)


ax = plt.gcf().add_axes([0.55, 0.6, 0.3, 0.25])
x = a.get_equal_weighted_posterior()[:,1]
y = a.get_equal_weighted_posterior()[:,0] / pi * 180
ax.plot(x, y, 'x ', color='k')
plt.locator_params(nbins=4)
ax.set_ylabel('angle [degree]')
ax.set_xlabel('y offset')

ax = plt.gcf().add_axes([0.55, 0.17, 0.3, 0.2])
ax.hist(a.get_equal_weighted_posterior()[:,2], color='k', histtype='step')
plt.locator_params(nbins=4)
ax.set_xlabel('log scatter')
ax.set_yticks([])

plt.savefig(outputfiles_basename + 'predict.pdf', bbox_inches='tight')
plt.savefig(outputfiles_basename + 'predict.png', bbox_inches='tight')
plt.close()
	import numpy
	from numpy import log, isnan, isfinite, sin, cos, tan, abs, any, pi
	import scipy, scipy.stats
	import pymultinest
	import json
	import sys
	import matplotlib.pyplot as plt

	numpy.random.seed(1)
	outputfiles_basename = "mnchains_toy_"

	print 'loading data...'
	data = numpy.loadtxt(sys.argv[1],
	dtype=[(colname, 'f') for colname in 'x', 'x_err', 'y', 'y_err', 'cor'],
	skiprows=1)

	allsamples = []

	# convert each data point description into many samples from its error distribution
	for row in data:
	x, x_err, y, y_err, cor = row
	cov = [[x_err*2, cor(x_erry_err)], [cor(x_erry_err), y_err*2]]
	allsamples.append(scipy.random.multivariate_normal([x, y], cov, size=40))

	allsamples = numpy.array(allsamples)

	x = allsamples[:,:,0]
	y = allsamples[:,:,1]
	print 'loading data... done'

	def prior(cube, ndim, nparams):
	# angle of line
	cube[0] = (cube[0] - 0.5) * pi
	# origin
	cube[1] = cube[1] - 0.5
	# intrinsic scatter, orthogonal to line; log
	cube[2] = cube[2] * 3 - 2

	plot = False

	def likelihood(cube, ndim, nparams):
	try:
	angle = cube[0]
	origin = cube[1]
	logscatter = cube[2]
	rv = scipy.stats.norm(0, 10**logscatter)
	#rv = scipy.stats.t(1, 0, 10**logscatter)

	# compute the density at x/y of the postulated line relation
	# compute distance from line
	xline = ((y - origin) / sin(angle)) * cos(angle)
	distance = abs(x - xline) * tan(angle)
	rowlike = rv.pdf(distance).mean(axis=1)
	totalloglike = numpy.log(rowlike).sum()
	if plot:
	print xline.shape, y.shape
	plt.plot(xline, y, '+', color='orange')
	#print totalloglike, angle, origin, logscatter, numpy.abs(distance).max()
	if not isfinite(totalloglike): # NaNs, Infs
	return -1e300
	else:
	return totalloglike
	except Exception as e:
	print e
	sys.exit(1)

	# number of dimensions our problem has
	parameters = ["angle", "origin", "logscatter"]
	n_params = len(parameters)
	# run MultiNest
	pymultinest.run(likelihood, prior, n_params, outputfiles_basename=outputfiles_basename,
	importance_nested_sampling = False, n_live_points=400, resume = True, verbose = True,
	mode_tolerance=1)

	# lets analyse the results
	a = pymultinest.Analyzer(n_params = n_params, outputfiles_basename=outputfiles_basename)
	stats = a.get_stats()
	# store name of parameters, always useful
	with file('%sparams.json' % a.outputfiles_basename, 'w') as f:
	json.dump(parameters, f, indent=2)
	# store derived stats
	with open('%sstats.json' % a.outputfiles_basename, mode='w') as f:
	json.dump(stats, f, indent=2)


	plt.figure(figsize=(7,7))
	plt.plot(data['x'], data['y'], 'x', ms=3, color='k', alpha=0.1)
	plt.xlim(data['x'].min() - 0.1, data['x'].max() + 0.1)
	plt.ylim(data['y'].min() - 0.1, data['y'].max() + 0.1)
	plt.xlabel('x')
	plt.ylabel('y')

	def plot_line(angle, origin, logscatter, **kwargs):
	t = numpy.linspace(-10, 10, 40)
	x = t * cos(angle)
	y = t * sin(angle) + origin
	plt.plot(x, y, '-', **kwargs)
	x1 = x + 10*logscatter sin(angle)
	y1 = y + 10*logscatter cos(angle)
	plt.plot(x1, y1, '--', **kwargs)
	x1 = x - 10*logscatter sin(angle)
	y1 = y - 10*logscatter cos(angle)
	plt.plot(x1, y1, '--', **kwargs)

	for angle, origin, logscatter in a.get_equal_weighted_posterior()[:40,:-1]:
	plot_line(angle, origin, logscatter, color='red', alpha=0.1)


	ax = plt.gcf().add_axes([0.55, 0.6, 0.3, 0.25])
	x = a.get_equal_weighted_posterior()[:,1]
	y = a.get_equal_weighted_posterior()[:,0] / pi * 180
	ax.plot(x, y, 'x ', color='k')
	plt.locator_params(nbins=4)
	ax.set_ylabel('angle [degree]')
	ax.set_xlabel('y offset')

	ax = plt.gcf().add_axes([0.55, 0.17, 0.3, 0.2])
	ax.hist(a.get_equal_weighted_posterior()[:,2], color='k', histtype='step')
	plt.locator_params(nbins=4)
	ax.set_xlabel('log scatter')
	ax.set_yticks([])

	plt.savefig(outputfiles_basename + 'predict.pdf', bbox_inches='tight')
	plt.savefig(outputfiles_basename + 'predict.png', bbox_inches='tight')
	plt.close()