brentp/gen-correlated.py

## gen-correlated.py
# generate correlated random numbers in python
# after: http://code.activestate.com/recipes/576512-generating-correlated-random-numbers/

from scipy.stats import norm
import numpy as np
chol = np.linalg.cholesky

qnorm = norm.ppf
pnorm = norm.cdf

def gen_correlated(sigma, n, observed=None):
    """
    generate correlated random numbers given the
    partial autocorrelation matrix sigma
    """
    C = np.matrix(chol(sigma))
    if observed is None:
        X = np.random.uniform(0, 1, size=(n, sigma.shape[0]))
    else:
        assert n * sigma.shape[0] < observed.shape[0]
        idxs = np.random.random_integers(0, len(observed) - 1,
                                         size=sigma.shape[0] * n)
        X = observed[idxs].reshape((n, sigma.shape[0]))

    Q = np.matrix(qnorm(X))
    for row in  np.array(pnorm((Q * C).T)).T:
        yield row

if __name__ == "__main__":

    ys, xs = np.mgrid[:75, :75]
    sigma = 0.7 ** abs(ys - xs)
    print "lag-1\tlag-2\tlag-3"
    for row in gen_correlated(sigma, 15):
        print "%.3f\t%.3f\t%.3f" % (np.corrcoef(row[:-1], row[1:])[0, 1], \
              np.corrcoef(row[:-2], row[2:])[0, 1], \
              np.corrcoef(row[:-3], row[3:])[0, 1])

## gistfile1.r
# generate autocorrelated data.
nLags = 75 # number of lags (size of region)
# fake, uncorrelated observations
X = rnorm(nLags)

###############################################
# fake sigma... correlated decreases distance.
sigma = diag(nLags)
corr = 0.7
sigma <- corr ^ abs(row(sigma)-col(sigma))

###############################################

# Y is autocorrelated...
Y <- t(X %*% chol(sigma))

## zaykin-truncated.py
import numpy as np
from scipy.stats import norm
from numpy.linalg import cholesky as chol
qnorm = norm.ppf
pnorm = norm.cdf

def gen_correlated(sigma, n):
    """
    generate autocorrelated data according to the matrix
    sigma
    """
    sigma = np.asmatrix(sigma)
    X = np.random.uniform(0, 1, size=(sigma.shape[0], n))
    return pnorm(chol(sigma) * qnorm(X))

def calc_w(ps, truncate_at=0.05):
    # product of ps that are less than truncate_at
    # avoid underflow by taking log.
    return np.exp(np.sum(np.log(ps[ps <= truncate_at])))

def sim(sigma, ps, nsims):
    Y = gen_correlated(sigma, nsims)
    w0 = calc_w(ps)
    return sum(calc_w(row) <= w0 for row in Y.T) / float(nsims)

if __name__ == "__main__":
    ps = np.array([0.01, 0.2, 0.04, 0.1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.05])
    ps = ps[:4]
    sigma = np.eye(ps.shape[0]).astype('f')
    sigma[0,1] = sigma[1, 0] = 0.9
    sigma[0,2] = sigma[2, 0] = 0.85
    sigma[0,3] = sigma[3, 0] = 0.7
    sigma[1,2] = sigma[2, 1] = 0.8
    sigma[1,3] = sigma[3, 1] = 0.6
    sigma[2,3] = sigma[3, 2] = 0.4
    print sigma

    print sim(sigma, ps, 10000)
	# generate correlated random numbers in python
	# after: http://code.activestate.com/recipes/576512-generating-correlated-random-numbers/

	from scipy.stats import norm
	import numpy as np
	chol = np.linalg.cholesky

	qnorm = norm.ppf
	pnorm = norm.cdf

	def gen_correlated(sigma, n, observed=None):
	"""
	generate correlated random numbers given the
	partial autocorrelation matrix sigma
	"""
	C = np.matrix(chol(sigma))
	if observed is None:
	X = np.random.uniform(0, 1, size=(n, sigma.shape[0]))
	else:
	assert n * sigma.shape[0] < observed.shape[0]
	idxs = np.random.random_integers(0, len(observed) - 1,
	size=sigma.shape[0] * n)
	X = observed[idxs].reshape((n, sigma.shape[0]))

	Q = np.matrix(qnorm(X))
	for row in np.array(pnorm((Q * C).T)).T:
	yield row

	if __name__ == "__main__":

	ys, xs = np.mgrid[:75, :75]
	sigma = 0.7 ** abs(ys - xs)
	print "lag-1\tlag-2\tlag-3"
	for row in gen_correlated(sigma, 15):
	print "%.3f\t%.3f\t%.3f" % (np.corrcoef(row[:-1], row[1:])[0, 1], \
	np.corrcoef(row[:-2], row[2:])[0, 1], \
	np.corrcoef(row[:-3], row[3:])[0, 1])
	# generate autocorrelated data.
	nLags = 75 # number of lags (size of region)
	# fake, uncorrelated observations
	X = rnorm(nLags)

	###############################################
	# fake sigma... correlated decreases distance.
	sigma = diag(nLags)
	corr = 0.7
	sigma <- corr ^ abs(row(sigma)-col(sigma))

	###############################################

	# Y is autocorrelated...
	Y <- t(X %*% chol(sigma))
	import numpy as np
	from scipy.stats import norm
	from numpy.linalg import cholesky as chol
	qnorm = norm.ppf
	pnorm = norm.cdf

	def gen_correlated(sigma, n):
	"""
	generate autocorrelated data according to the matrix
	sigma
	"""
	sigma = np.asmatrix(sigma)
	X = np.random.uniform(0, 1, size=(sigma.shape[0], n))
	return pnorm(chol(sigma) * qnorm(X))

	def calc_w(ps, truncate_at=0.05):
	# product of ps that are less than truncate_at
	# avoid underflow by taking log.
	return np.exp(np.sum(np.log(ps[ps <= truncate_at])))

	def sim(sigma, ps, nsims):
	Y = gen_correlated(sigma, nsims)
	w0 = calc_w(ps)
	return sum(calc_w(row) <= w0 for row in Y.T) / float(nsims)

	if __name__ == "__main__":
	ps = np.array([0.01, 0.2, 0.04, 0.1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.05])
	ps = ps[:4]
	sigma = np.eye(ps.shape[0]).astype('f')
	sigma[0,1] = sigma[1, 0] = 0.9
	sigma[0,2] = sigma[2, 0] = 0.85
	sigma[0,3] = sigma[3, 0] = 0.7
	sigma[1,2] = sigma[2, 1] = 0.8
	sigma[1,3] = sigma[3, 1] = 0.6
	sigma[2,3] = sigma[3, 2] = 0.4
	print sigma

	print sim(sigma, ps, 10000)