Last active
October 17, 2022 15:05
-
-
Save stratisMarkou/976e40272ed72e9b832148a29f719e2a to your computer and use it in GitHub Desktop.
CDF of a Multivariate Normal
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
An implementation of Genz's numerical routine for evaluating the integral of a | |
multivariate Gaussian within a hypercube volume as specified in: | |
https://www.math.wsu.edu/faculty/genz/papers/mvn.pdf | |
""" | |
from typing import * | |
import tensorflow as tf | |
import tensorflow_probability as tfp | |
# ============================================================================= | |
# Two small helpers | |
# ============================================================================= | |
def update_add( | |
tensor: tf.Tensor, | |
index: List[int], | |
update: tf.Tensor, | |
): | |
index = tf.convert_to_tensor([index]) | |
update = tf.convert_to_tensor([update]) | |
index = tf.convert_to_tensor(index) | |
updated = tf.tensor_scatter_nd_add( | |
tensor=tensor, | |
indices=index, | |
updates=update, | |
) | |
return updated | |
def vector_dot(a: tf.Tensor, b: tf.Tensor): | |
return tf.reduce_sum(a*b) | |
# ============================================================================= | |
# Genz MC estimator | |
# ============================================================================= | |
@tf.function | |
def single_sample_cdf( | |
mean: tf.Tensor, | |
cov: tf.Tensor, | |
lower: tf.Tensor, | |
upper: tf.Tensor, | |
samples: tf.Tensor, | |
): | |
# Identify data type to use for all calculations | |
dtype = mean.dtype | |
# Dimension of the integral | |
D = mean.shape[-1] | |
# Rename samples and limits for brevity | |
w = samples | |
a = lower - mean | |
b = upper - mean | |
# Compute cholesky of the covariance matrix | |
C = tf.linalg.cholesky(cov) | |
# Initialise transformation variables | |
d = tf.zeros_like(mean) | |
e = tf.zeros_like(mean) | |
f = tf.zeros_like(mean) | |
y = tf.zeros_like(mean) | |
# Initialise standard normal for computing CDFs | |
normal = tfp.distributions.Normal( | |
loc=tf.zeros(shape=(), dtype=dtype), | |
scale=tf.ones(shape=(), dtype=dtype), | |
) | |
Phi = lambda x: normal.cdf(x) | |
iPhi = lambda x: normal.quantile(x) | |
# Compute transformation variables at the first step | |
d = update_add(d, [0], Phi(a[0] / C[0, 0])) | |
e = update_add(e, [0], Phi(b[0] / C[0, 0])) | |
f = update_add(f, [0], e[0] - d[0]) | |
for i in tf.range(1, D): | |
# Update y[i-1] | |
y = update_add(y, [i-1], iPhi(d[i-1] + w[i-1] * (e[i-1] - d[i-1]))) | |
# Update d[i-1] and e[i-1] | |
d = update_add( | |
d, [i], Phi((a[i] - vector_dot(C[i, :i], y[:i])) / C[i, i]) | |
) | |
e = update_add( | |
e, [i], Phi((b[i] - vector_dot(C[i, :i], y[:i])) / C[i, i]) | |
) | |
f = update_add(f, [i], (e[i] - d[i]) * f[i-1]) | |
return f[-1] | |
# ============================================================================= | |
# Running the estimator on a toy problem | |
# ============================================================================= | |
# Set random seed | |
tf.random.set_seed(0) | |
# Set default data type | |
dtype = tf.float32 | |
# Number of samples and dimension of Gaussian | |
S = 10 | |
D = 5 | |
# Set up the mean and covariance | |
mean = tf.zeros(shape=(D,), dtype=dtype) | |
rand = tf.random.uniform((D, D), dtype=dtype) | |
cov = tf.matmul(rand, rand, transpose_b=True) + tf.eye(D, dtype=dtype) | |
# Set up lower and upper integration limits | |
lower = - tf.random.uniform((D,), dtype=dtype) | |
upper = tf.random.uniform((D,), dtype=dtype) | |
# Draw samples to use within the MC estiamate | |
samples = tf.random.uniform(shape=(S, D)) | |
# Equally well, you can use Sobol sequences by replacing the line above with | |
# samples = tf.math.sobol_sample(dim=D, num_results=S) | |
# Create a lambda for convenience, use tf.map_fn for speed | |
_single_sample_cdf = lambda x: single_sample_cdf(mean, cov, lower, upper, x) | |
# Compute MC estimates of truncated multivariate Gaussian integral | |
cdf = tf.map_fn(_single_sample_cdf, samples) | |
print( | |
f"CDF: {tf.reduce_mean(cdf):.6f} +/- " | |
f"{2*tf.math.reduce_std(cdf)/S**0.5:.6f}" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment