|
#! /usr/bin/python |
|
"""Tweaked tests of autocorrelation length. |
|
""" |
|
import sys |
|
import multiprocessing |
|
import numpy as np |
|
from emcee import ensemble |
|
|
|
CHAIN_PARAMS = [(10000, 0.8), # Autocorrelation time = 9 |
|
(20000, 0.904761904762), # Autocorrelation time = 20 |
|
(30000, 0.9354838709677), # Autocorrelation time = 30 |
|
(60000, 0.9672131147541), # Autocorrelation time = 60 |
|
(200000, 0.990050200903734685)] # Autocorrelation time = 200 |
|
|
|
|
|
def autoregressive(nsteps, phi): |
|
# set the number of walkers |
|
m_walkers = 64 |
|
# implement an AR(1) model which can be written as: |
|
# X_t = c + phi*X_(t-1) + epsilon_t |
|
# where c is some constant offset, phi is the autoregression coefficient |
|
# and epsilon_t is some mean zero random number distributed with a given |
|
# variance |
|
# this is what we want the sqrt of the variance of the time series to be |
|
sigma_total = 1.0 |
|
# this is the sqrt of the variance of the random numbers required to get the correct total variance |
|
# this works because the variance (sigma_total^2) of an AR(1) model is: sigma_e^2/(1-phi^2) |
|
# solve for sigma_e to get the expression below |
|
sigma_e = sigma_total * np.sqrt(1. -phi**2) |
|
# The (unnormalized) autocovariance function is: B(n) = E(X(T+n)*X(T)) - avg^2 = sigma_e^2/(1-phi^2)*phi^|n| |
|
# because of how we have defined sigma_e for this time series, for this particular AR(1) model |
|
# the autocovariance function collapses to B(n) = phi^|n| |
|
# the normalized autocovariance function C(t) = (B(t)/B(0)), however |
|
# B(0) = 1 for this AR(1) therefore C(t) = B(t) |
|
# the integrated autocorrelation time, when there is no noise, has two definitions |
|
# "Monte Carlo Methods in Statistical Mechanics: Foundations and New |
|
# Algorithms" by Alan D. Sokal Lectures at the Cargese Summer School on |
|
# "Functional Integration: Basics and Applications" September 1996 gives: |
|
# tau = 1/2 * (Sum[C(t),{t,-inf, inf}]) |
|
# Goodman and Weare Ensemble Samplers with Affine Invariance in Communications |
|
# in Applied Mathematics and Computational Science, Vol5, No1, pg65-80, |
|
# 2010, gives: |
|
# tau = Sum[C(t),{t,-inf, inf}] |
|
# I use the latter definition |
|
# for phi < 1, phi^|n| go to zero for large n, we can truncate the |
|
# summation before reaching n=infinity. This truncated summation would then |
|
# be implemented as: |
|
# truelength = (phi**np.abs(np.arange(-1000,1000))).sum() |
|
# However because our summation is symmetrical in n, we can "fold" our |
|
# summation into: |
|
# tau = 1+2*Sum[C(t),{t,1, inf}] |
|
# which cuts the number of calculations required, roughly in half |
|
truelength = (1.0 + (2.0 * ((phi**np.abs(np.arange(1,1000))).sum()))) |
|
# initialize the chain: |
|
chain = np.zeros((m_walkers, nsteps)) |
|
# set the first point to just a random number with variance sigma_total |
|
chain[:,0] = sigma_total * np.random.normal(size=m_walkers) |
|
# set each of the subsequent points according to the AR(1) model |
|
c = np.ones(m_walkers) |
|
for i in xrange(1, nsteps): |
|
chain[:,i] = phi * chain[:,i-1] + sigma_e * np.random.normal(size=m_walkers) + c |
|
# print 'mean, expected: ', chain.mean(), 0. |
|
# print 'standard deviation, expected: ', chain.std(), sigma_total |
|
chain.shape = (m_walkers, nsteps, 1) # make last index param dimension (1-d here) |
|
return chain, truelength |
|
|
|
|
|
def findauto(params, windowList): |
|
ns = params.shape[0] |
|
mc = params.shape[1] |
|
nparams = params.shape[2] |
|
# get the first power of two greater than the number of samples |
|
maxLen = 2**int(np.ceil(np.log2(ns))) |
|
# collapse the chain to a 2D array from the 3D that was passed (for compatibility with emcee) |
|
chain = params[:,:,0] |
|
# generate an empty array to hold each walker's autocovariance function |
|
chaincorr = np.zeros((mc,ns)) |
|
# generate an array to hold the forward fourier transforms of each walker |
|
temp = np.zeros(maxLen, dtype='complex128') |
|
# calculate the average for each walker |
|
avgs = chain.mean(axis=0) |
|
for j in xrange(mc): |
|
# calculate the forward fourier transform of the *centered* walker chain |
|
temp = np.fft.fft(chain[:,j]-avgs[j],n=maxLen) |
|
# calculate the inverse fourier transform of the squared magnitudes of |
|
# the forward transform and extract the real parts to remove numerical |
|
# precision errors that give non-zero imaginary components |
|
chaincorr[j,:] = np.fft.ifft(temp*np.conjugate(temp))[:ns].real |
|
# average the per walker auto correllation functions |
|
corr = chaincorr.mean(axis=0) |
|
# convert the averaged autocorrelation function into the normalized |
|
# autocorrelation function |
|
corr /= corr[0] |
|
# this is like the sum above to find tau, but because we multiply the whole |
|
# thing by 2, we subtract one since otherwise we double counted the t=0 pt |
|
nclust_cum = 2.*corr.cumsum() - 1. |
|
return [find_edges(nclust_cum, ns, x) for x in windowList] |
|
|
|
|
|
def find_edges(nclust_cum, ns, windowLength): |
|
# make an array of window sizes, needed for the integrated autocorrelation |
|
# time finding algorithm for noisy time series given by sokal |
|
index = np.arange(ns) |
|
# find points where the window size exceeds some safety factor times the |
|
# cumulative sum |
|
end = index > windowLength*nclust_cum |
|
if end.any(): |
|
# the cumulative sum at the first point fulfilling the above condition |
|
# is the autocorrelation time |
|
imax = np.where(end)[0][0] |
|
nclust = nclust_cum[imax-1] |
|
else: |
|
# if there were no points, then there were not enough samples to |
|
# correctly determine the autocorrelation time for a given window len |
|
imax = -1 |
|
nclust = nclust_cum[-1] |
|
print 'WARNING: autocorr not converged.' |
|
return nclust |
|
|
|
|
|
def findautofromemcee(chain): |
|
m_walkers, nsteps, dim = chain.shape |
|
def foofn(): |
|
pass |
|
sampler = ensemble.EnsembleSampler(m_walkers, dim, foofn, a=2.0) |
|
sampler._chain = chain |
|
return sampler.acor[0] |
|
|
|
|
|
def do_single_trial(params): |
|
# pull the parameters out of the tuple passed |
|
index = params[0] |
|
nsamples = params[1] |
|
phi = params[2] |
|
# give the user something to stare at (if not running on a cluster |
|
if index % 10 == 0: |
|
print index |
|
# generate the chain |
|
chain, truelength = autoregressive(nsamples, phi) |
|
# get the transpose of the chain for the format emcee wants |
|
biechain = chain[:,:,0].T[:,:,np.newaxis] |
|
# get the emcee estimate |
|
meanfirst = findautofromemcee(chain) |
|
# get the new method autocorrelation times |
|
windowList = range(1, 31) |
|
temp = findauto(biechain, windowList) |
|
# build the output list |
|
output = [truelength, meanfirst] |
|
output.extend(temp) |
|
return output |
|
|
|
|
|
def main(ntrials, numcores, nsamples, phi, outfile): |
|
# make the 'thread pool' |
|
mp_pool = multiprocessing.Pool(processes=numcores) |
|
# generate a list of parameters to map across |
|
params = [(i, nsamples, phi) for i in range(ntrials)] |
|
# map the |
|
values = mp_pool.map(do_single_trial, params) |
|
temp = np.array(values) |
|
np.savez_compressed(outfile, data=temp) |
|
|
|
HELP_STR = """Usage: |
|
{0:s} <Num Trials> <Num Cores> <AC Time Index> <output file> |
|
AC Time Index = 0 --> Autocorrelation time = 9 |
|
AC Time Index = 1 --> Autocorrelation time = 20 |
|
AC Time Index = 2 --> Autocorrelation time = 30 |
|
AC Time Index = 3 --> Autocorrelation time = 60 |
|
AC Time Index = 4 --> Autocorrelation time = 200""" |
|
|
|
if __name__ == "__main__": |
|
if len(sys.argv) != 5: |
|
print HELP_STR.format(sys.argv[0]) |
|
sys.exit() |
|
params = CHAIN_PARAMS[int(sys.argv[3])] |
|
main(int(sys.argv[1]), int(sys.argv[2]), params[0], params[1], sys.argv[4]) |