mattpitkin/roqupdatetest.py

## roqupdatetest.py
from __future__ import print_function, division

import numpy as np
from scipy.signal import fftconvolve
import subprocess as sp
import os
import sys
import gzip
import h5py
from time import time

from scotchcorner import scotchcorner

# these modules require lalapps
from lalapps.pulsarpputils import coord_to_string, rad_to_hms, rad_to_dms
from lalinference.io import read_samples
import lalinference

# some matplotlib configurations
mplparams = { \
      'backend': 'Agg',
      'text.usetex': True, # use LaTeX for all text
      'axes.linewidth': 0.5, # set axes linewidths to 0.5
      'axes.grid': True, # add a grid
      'grid.linewidth': 0.5,
      'font.family': 'sans-serif',
      'font.sans-serif': 'Avant Garde, Helvetica, Computer Modern Sans serif',
      'font.size': 15 }

# set up the run directories
rundir = '.'
if not os.path.isdir(rundir): # make the directory
    os.makedirs(rundir)

detector = 'H1'        # the detector to use
psrname = 'J0000+0000' # a fake pulsar name

# set the output directory
outdir = os.path.join(rundir, 'output')
if not os.path.isdir(outdir):
    os.makedirs(outdir)

# fake heterodyned data directory (for the lalapps_pulsar_parameter_estimation code
# this must be dataDET, where DET is e.g. H1)
datadir = os.path.join(rundir, 'data'+detector)
if not os.path.isdir(datadir): # make the directory
    os.makedirs(datadir)

# set the executables (assuming you are in a virtual environment)
execpath = os.path.join(os.environ['VIRTUAL_ENV'], 'bin')

ppenexec = os.path.join(execpath, 'lalapps_pulsar_parameter_estimation_nested')
n2pexec = os.path.join(execpath, 'lalapps_nest2pos') # script to convert nested samples to posterior samples

# set up some general inputs

# create a pulsar parameter (TEMPO-stype .par file) file format string
pardat = """PSRJ J0000+0000
RAJ {}
DECJ {}
F0 123.4567890
F1 -1.0e-12
PEPOCH 56789.0
EPHEM DE405
"""

# some defaults for the data generation
sigma = 1.0e-22        # set data standard deviation
dt = 60                # number of seconds between data points
gpsstart = 1125969920  # GPS start time of data (over a year after the pulsar period epoch)
duration = 86400      # duration of data (seconds) - 1 days
gpstimes = np.arange(gpsstart, gpsstart+duration, dt) # time stamps
dlen = len(gpstimes)   # length of data

# get an estimate of the 95% credible upper limit to be expected
ulest = 10.8*np.sqrt(sigma**2/dlen)

# create the prior file for the lalapps_pulsar_parameter_estimation_nested code
# the F0 and F1 parameters have Gaussian prior distributions
f0mu = 123.4567890
f0sigma = 1e-7
f1mu = -1e-12
f1sigma = 1e-13

priorfile = os.path.join(rundir, 'pulsar.prior')
priordat = """H0 uniform 0 {}
PHI0 uniform 0 {}
PSI uniform 0 {}
COSIOTA uniform -1 1
F0 gaussian {:.7f} {:.1e}
F1 gaussian {:.1e} {:.1e}
"""
fp = open(priorfile, 'w')
# set the h0 upper range to be 6 times the expected upper limit
fp.write(priordat.format(ulest*6., np.pi, np.pi/2., f0mu, f0sigma, f1mu, f1sigma))
fp.close()

# lalapps_pulsar_parameter_estimation_nested run parameters
Nlive = '2048' # number of nested sample live points

# ROQ setup
ntraining = '1500'     # the number of training models to form the reduced basis
roqtolerance = '1e-12' # the maximum allowed residuals between the training set and reduced basis
roqfile = os.path.join(datadir, 'roq.bin') # a file to output the ROQ intepolants and nodes

# create data
data = sigma*np.random.randn(dlen, 2)

# append times and data together
tad = np.vstack((gpstimes, data.T)).T

# output fake data
datafile = os.path.join(datadir, 'finehet_'+psrname+'_'+detector)
np.savetxt(datafile, tad, fmt='%.6f %.7e %.7e', delimiter='\t')

# create a random sky position from a uniform distribution on the sky
rah, ram, ras = rad_to_hms(2.*np.pi*np.random.rand())
decd, decm, decs = rad_to_dms(np.arccos(-1.+2.*np.random.rand()) - np.pi/2.)

# output .par file containing right ascension and declination
parfile = os.path.join(rundir, 'pulsar.par')
fp = open(parfile, 'w')
fp.write(pardat.format(coord_to_string(rah, ram, ras), coord_to_string(decd, decm, decs)))
fp.close()

# run lalapps_pulsar_parameter_estimation_nested in its standard mode
codecall = ' '.join([ppenexec, '--detectors', detector,
                     '--par-file', parfile, '--prior-file', priorfile,
                     '--input-files', datafile, '--outfile', os.path.join(outdir, 'fake_nest.hdf'),
                     '--Nlive', Nlive, '--Nmcmcinitial', '0'])
print(codecall)
t0 = time()
p = sp.Popen(codecall, stdout=sp.PIPE, stderr=sp.PIPE, shell=True)
out, err = p.communicate()
t1 = time()
timenested = (t1-t0)

# nested samples need to be converted to posterior samples with lalapps_nest2pos
if os.path.isfile(os.path.join(outdir, 'fake_post.hdf')):
    os.remove(os.path.join(outdir, 'fake_post.hdf'))
codecall = ' '.join([n2pexec, '-p', os.path.join(outdir, 'fake_post.hdf'),
                     os.path.join(outdir, 'fake_nest.hdf')])
p = sp.Popen(codecall, stdout=sp.PIPE, stderr=sp.PIPE, shell=True)
out, err = p.communicate()

post = read_samples(os.path.join(outdir, 'fake_post.hdf'), tablename=lalinference.LALInferenceHDF5PosteriorSamplesDatasetName)

# get the maximum likelihood
evfile = h5py.File(os.path.join(outdir, 'fake_post.hdf'), 'r')
maxlike = evfile['lalinference']['lalinference_nest'].attrs['log_max_likelihood']
evfile.close()

# run lalapps_pulsar_parameter_estimation using the ROQ (first run to generate the ROQ)
codecall = ' '.join([ppenexec, '--detectors', detector,
                     '--par-file', parfile, '--prior-file', priorfile,
                     '--input-files', datafile, '--outfile', os.path.join(outdir, 'fake_nest_roq.hdf'),
                     '--Nlive', Nlive, '--Nmcmcinitial', '0', '--roq', '--roq-tolerance',
                     roqtolerance, '--ntraining', ntraining, '--output-weights', roqfile])
print(codecall)

t0 = time()
p = sp.Popen(codecall, stdout=sp.PIPE, stderr=sp.PIPE, shell=True)
out, err = p.communicate()
t1 = time()
timegenerateroq = (t1-t0)

# run lalapps_pulsar_parameter_estimation using the ROQ (using pregenerated ROQ)
codecall = ' '.join([ppenexec, '--detectors', detector,
                     '--par-file', parfile, '--prior-file', priorfile,
                     '--input-files', datafile, '--outfile', os.path.join(outdir, 'fake_nest_roq.hdf'),
                     '--Nlive', Nlive, '--Nmcmcinitial', '0', '--roq',
                     '--input-weights', roqfile])
print(codecall)

t0 = time()
p = sp.Popen(codecall, stdout=sp.PIPE, stderr=sp.PIPE, shell=True)
out, err = p.communicate()
t1 = time()
timeroq = (t1-t0)

# nested samples need to be converted to posterior samples with lalapps_nest2pos
if os.path.isfile(os.path.join(outdir, 'fake_post_roq.hdf')):
    os.remove(os.path.join(outdir, 'fake_post_roq.hdf'))
codecall = ' '.join([n2pexec, '-p', os.path.join(outdir, 'fake_post_roq.hdf'),
                     os.path.join(outdir, 'fake_nest_roq.hdf')])
p = sp.Popen(codecall, stdout=sp.PIPE, stderr=sp.PIPE, shell=True)
out, err = p.communicate()

# read in samples
postroq = read_samples(os.path.join(outdir, 'fake_post_roq.hdf'), tablename=lalinference.LALInferenceHDF5PosteriorSamplesDatasetName)

# get the maximum likelihood
evfileroq = h5py.File(os.path.join(outdir, 'fake_nest_roq.hdf'), 'r')
maxlikeroq = evfileroq['lalinference']['lalinference_nest'].attrs['log_max_likelihood']
evfileroq.close()

# get required parameter columns
params = ['H0', 'PHI0', 'PSI', 'COSIOTA', 'F0', 'F1']

# lalapps_pulsar_parameter_estimation_nested posterior samples
postppen = np.zeros((len(post['H0']), len(params)))
postppenroq = np.zeros((len(postroq['H0']), len(params)))
for i, p in enumerate(params):
    postppen[:,i] = post[p]
    postppenroq[:,i] = postroq[p]

histops = {'histtype': 'step', 'color': 'b', 'edgecolor': 'b', 'linewidth': 1.5, 'alpha': 1.0}
labels = [r'$h_0$', r'$\phi_0$ (rads)', r'$\psi$ (rads)', r'$\cos{\iota}$', r'$f_0$ (Hz)', r'$\dot{f}_0$ (Hz/s)']
limits = [(0., None), (0., np.pi), (0., np.pi/2.), (-1., 1.), (None, None), (None, None)]

# subtract means from f0 and f1
postppen[:,params.index('F0')] = postppen[:,params.index('F0')] - f0mu
postppen[:,params.index('F1')] = postppen[:,params.index('F1')] - f1mu
postppenroq[:,params.index('F0')] = postppenroq[:,params.index('F0')] - f0mu
postppenroq[:,params.index('F1')] = postppenroq[:,params.index('F1')] - f1mu

# add the standard run
sc = scotchcorner(postppen, bins=20, ratio=2, labels=labels, datatitle='Standard', showlims='both', hist_kwargs=histops,
                  showcontours=False, limits=limits, showpoints=True, scatter_kwargs={'alpha': 0.1})
# add the ROQ run
histops = {'histtype': 'step', 'color': 'k', 'linewidth': 1.5}
sc.add_data(postppenroq, hist_kwargs=histops, datatitle='ROQ', showcontours=False, scatter_kwargs={'alpha': 0.1}, limits=limits)

# add F0 and F1 prior distributions
from scipy.stats import norm
axf0 = sc.get_axis('$f_0$ (Hz)')
f0range = np.linspace(axf0.get_xlim()[0], axf0.get_xlim()[1], 100)
axf0.plot(f0range, norm.pdf(f0range, 0, f0sigma), 'g--', lw=1.5)

axf1 = sc.get_axis('$\dot{f}_0$ (Hz/s)')
f1range = np.linspace(axf1.get_ylim()[0], axf1.get_ylim()[1], 100)
axf1.plot(norm.pdf(f1range, 0, f1sigma), f1range, 'g--', lw=1.5)
	from __future__ import print_function, division

	import numpy as np
	from scipy.signal import fftconvolve
	import subprocess as sp
	import os
	import sys
	import gzip
	import h5py
	from time import time

	from scotchcorner import scotchcorner

	# these modules require lalapps
	from lalapps.pulsarpputils import coord_to_string, rad_to_hms, rad_to_dms
	from lalinference.io import read_samples
	import lalinference

	# some matplotlib configurations
	mplparams = { \
	'backend': 'Agg',
	'text.usetex': True, # use LaTeX for all text
	'axes.linewidth': 0.5, # set axes linewidths to 0.5
	'axes.grid': True, # add a grid
	'grid.linewidth': 0.5,
	'font.family': 'sans-serif',
	'font.sans-serif': 'Avant Garde, Helvetica, Computer Modern Sans serif',
	'font.size': 15 }

	# set up the run directories
	rundir = '.'
	if not os.path.isdir(rundir): # make the directory
	os.makedirs(rundir)

	detector = 'H1' # the detector to use
	psrname = 'J0000+0000' # a fake pulsar name

	# set the output directory
	outdir = os.path.join(rundir, 'output')
	if not os.path.isdir(outdir):
	os.makedirs(outdir)

	# fake heterodyned data directory (for the lalapps_pulsar_parameter_estimation code
	# this must be dataDET, where DET is e.g. H1)
	datadir = os.path.join(rundir, 'data'+detector)
	if not os.path.isdir(datadir): # make the directory
	os.makedirs(datadir)

	# set the executables (assuming you are in a virtual environment)
	execpath = os.path.join(os.environ['VIRTUAL_ENV'], 'bin')

	ppenexec = os.path.join(execpath, 'lalapps_pulsar_parameter_estimation_nested')
	n2pexec = os.path.join(execpath, 'lalapps_nest2pos') # script to convert nested samples to posterior samples

	# set up some general inputs

	# create a pulsar parameter (TEMPO-stype .par file) file format string
	pardat = """PSRJ J0000+0000
	RAJ {}
	DECJ {}
	F0 123.4567890
	F1 -1.0e-12
	PEPOCH 56789.0
	EPHEM DE405
	"""

	# some defaults for the data generation
	sigma = 1.0e-22 # set data standard deviation
	dt = 60 # number of seconds between data points
	gpsstart = 1125969920 # GPS start time of data (over a year after the pulsar period epoch)
	duration = 86400 # duration of data (seconds) - 1 days
	gpstimes = np.arange(gpsstart, gpsstart+duration, dt) # time stamps
	dlen = len(gpstimes) # length of data

	# get an estimate of the 95% credible upper limit to be expected
	ulest = 10.8np.sqrt(sigma*2/dlen)

	# create the prior file for the lalapps_pulsar_parameter_estimation_nested code
	# the F0 and F1 parameters have Gaussian prior distributions
	f0mu = 123.4567890
	f0sigma = 1e-7
	f1mu = -1e-12
	f1sigma = 1e-13

	priorfile = os.path.join(rundir, 'pulsar.prior')
	priordat = """H0 uniform 0 {}
	PHI0 uniform 0 {}
	PSI uniform 0 {}
	COSIOTA uniform -1 1
	F0 gaussian {:.7f} {:.1e}
	F1 gaussian {:.1e} {:.1e}
	"""
	fp = open(priorfile, 'w')
	# set the h0 upper range to be 6 times the expected upper limit
	fp.write(priordat.format(ulest*6., np.pi, np.pi/2., f0mu, f0sigma, f1mu, f1sigma))
	fp.close()

	# lalapps_pulsar_parameter_estimation_nested run parameters
	Nlive = '2048' # number of nested sample live points

	# ROQ setup
	ntraining = '1500' # the number of training models to form the reduced basis
	roqtolerance = '1e-12' # the maximum allowed residuals between the training set and reduced basis
	roqfile = os.path.join(datadir, 'roq.bin') # a file to output the ROQ intepolants and nodes

	# create data
	data = sigma*np.random.randn(dlen, 2)

	# append times and data together
	tad = np.vstack((gpstimes, data.T)).T

	# output fake data
	datafile = os.path.join(datadir, 'finehet_'+psrname+'_'+detector)
	np.savetxt(datafile, tad, fmt='%.6f %.7e %.7e', delimiter='\t')

	# create a random sky position from a uniform distribution on the sky
	rah, ram, ras = rad_to_hms(2.np.pinp.random.rand())
	decd, decm, decs = rad_to_dms(np.arccos(-1.+2.*np.random.rand()) - np.pi/2.)

	# output .par file containing right ascension and declination
	parfile = os.path.join(rundir, 'pulsar.par')
	fp = open(parfile, 'w')
	fp.write(pardat.format(coord_to_string(rah, ram, ras), coord_to_string(decd, decm, decs)))
	fp.close()

	# run lalapps_pulsar_parameter_estimation_nested in its standard mode
	codecall = ' '.join([ppenexec, '--detectors', detector,
	'--par-file', parfile, '--prior-file', priorfile,
	'--input-files', datafile, '--outfile', os.path.join(outdir, 'fake_nest.hdf'),
	'--Nlive', Nlive, '--Nmcmcinitial', '0'])
	print(codecall)
	t0 = time()
	p = sp.Popen(codecall, stdout=sp.PIPE, stderr=sp.PIPE, shell=True)
	out, err = p.communicate()
	t1 = time()
	timenested = (t1-t0)

	# nested samples need to be converted to posterior samples with lalapps_nest2pos
	if os.path.isfile(os.path.join(outdir, 'fake_post.hdf')):
	os.remove(os.path.join(outdir, 'fake_post.hdf'))
	codecall = ' '.join([n2pexec, '-p', os.path.join(outdir, 'fake_post.hdf'),
	os.path.join(outdir, 'fake_nest.hdf')])
	p = sp.Popen(codecall, stdout=sp.PIPE, stderr=sp.PIPE, shell=True)
	out, err = p.communicate()

	post = read_samples(os.path.join(outdir, 'fake_post.hdf'), tablename=lalinference.LALInferenceHDF5PosteriorSamplesDatasetName)

	# get the maximum likelihood
	evfile = h5py.File(os.path.join(outdir, 'fake_post.hdf'), 'r')
	maxlike = evfile['lalinference']['lalinference_nest'].attrs['log_max_likelihood']
	evfile.close()

	# run lalapps_pulsar_parameter_estimation using the ROQ (first run to generate the ROQ)
	codecall = ' '.join([ppenexec, '--detectors', detector,
	'--par-file', parfile, '--prior-file', priorfile,
	'--input-files', datafile, '--outfile', os.path.join(outdir, 'fake_nest_roq.hdf'),
	'--Nlive', Nlive, '--Nmcmcinitial', '0', '--roq', '--roq-tolerance',
	roqtolerance, '--ntraining', ntraining, '--output-weights', roqfile])
	print(codecall)

	t0 = time()
	p = sp.Popen(codecall, stdout=sp.PIPE, stderr=sp.PIPE, shell=True)
	out, err = p.communicate()
	t1 = time()
	timegenerateroq = (t1-t0)

	# run lalapps_pulsar_parameter_estimation using the ROQ (using pregenerated ROQ)
	codecall = ' '.join([ppenexec, '--detectors', detector,
	'--par-file', parfile, '--prior-file', priorfile,
	'--input-files', datafile, '--outfile', os.path.join(outdir, 'fake_nest_roq.hdf'),
	'--Nlive', Nlive, '--Nmcmcinitial', '0', '--roq',
	'--input-weights', roqfile])
	print(codecall)

	t0 = time()
	p = sp.Popen(codecall, stdout=sp.PIPE, stderr=sp.PIPE, shell=True)
	out, err = p.communicate()
	t1 = time()
	timeroq = (t1-t0)

	# nested samples need to be converted to posterior samples with lalapps_nest2pos
	if os.path.isfile(os.path.join(outdir, 'fake_post_roq.hdf')):
	os.remove(os.path.join(outdir, 'fake_post_roq.hdf'))
	codecall = ' '.join([n2pexec, '-p', os.path.join(outdir, 'fake_post_roq.hdf'),
	os.path.join(outdir, 'fake_nest_roq.hdf')])
	p = sp.Popen(codecall, stdout=sp.PIPE, stderr=sp.PIPE, shell=True)
	out, err = p.communicate()

	# read in samples
	postroq = read_samples(os.path.join(outdir, 'fake_post_roq.hdf'), tablename=lalinference.LALInferenceHDF5PosteriorSamplesDatasetName)

	# get the maximum likelihood
	evfileroq = h5py.File(os.path.join(outdir, 'fake_nest_roq.hdf'), 'r')
	maxlikeroq = evfileroq['lalinference']['lalinference_nest'].attrs['log_max_likelihood']
	evfileroq.close()

	# get required parameter columns
	params = ['H0', 'PHI0', 'PSI', 'COSIOTA', 'F0', 'F1']

	# lalapps_pulsar_parameter_estimation_nested posterior samples
	postppen = np.zeros((len(post['H0']), len(params)))
	postppenroq = np.zeros((len(postroq['H0']), len(params)))
	for i, p in enumerate(params):
	postppen[:,i] = post[p]
	postppenroq[:,i] = postroq[p]

	histops = {'histtype': 'step', 'color': 'b', 'edgecolor': 'b', 'linewidth': 1.5, 'alpha': 1.0}
	labels = [r'$h_0$', r'$\phi_0$ (rads)', r'$\psi$ (rads)', r'$\cos{\iota}$', r'$f_0$ (Hz)', r'$\dot{f}_0$ (Hz/s)']
	limits = [(0., None), (0., np.pi), (0., np.pi/2.), (-1., 1.), (None, None), (None, None)]

	# subtract means from f0 and f1
	postppen[:,params.index('F0')] = postppen[:,params.index('F0')] - f0mu
	postppen[:,params.index('F1')] = postppen[:,params.index('F1')] - f1mu
	postppenroq[:,params.index('F0')] = postppenroq[:,params.index('F0')] - f0mu
	postppenroq[:,params.index('F1')] = postppenroq[:,params.index('F1')] - f1mu

	# add the standard run
	sc = scotchcorner(postppen, bins=20, ratio=2, labels=labels, datatitle='Standard', showlims='both', hist_kwargs=histops,
	showcontours=False, limits=limits, showpoints=True, scatter_kwargs={'alpha': 0.1})
	# add the ROQ run
	histops = {'histtype': 'step', 'color': 'k', 'linewidth': 1.5}
	sc.add_data(postppenroq, hist_kwargs=histops, datatitle='ROQ', showcontours=False, scatter_kwargs={'alpha': 0.1}, limits=limits)

	# add F0 and F1 prior distributions
	from scipy.stats import norm
	axf0 = sc.get_axis('$f_0$ (Hz)')
	f0range = np.linspace(axf0.get_xlim()[0], axf0.get_xlim()[1], 100)
	axf0.plot(f0range, norm.pdf(f0range, 0, f0sigma), 'g--', lw=1.5)

	axf1 = sc.get_axis('$\dot{f}_0$ (Hz/s)')
	f1range = np.linspace(axf1.get_ylim()[0], axf1.get_ylim()[1], 100)
	axf1.plot(norm.pdf(f1range, 0, f1sigma), f1range, 'g--', lw=1.5)