Skip to content

Instantly share code, notes, and snippets.

@brandonwillard
Created May 18, 2016 20:36
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save brandonwillard/ef4db928eb4c18352eb04e32f3d47002 to your computer and use it in GitHub Desktop.
Save brandonwillard/ef4db928eb4c18352eb04e32f3d47002 to your computer and use it in GitHub Desktop.
Somewhat fixed MvNormal implementation
from scipy import stats
from theano.tensor.nlinalg import det, matrix_inverse, trace, eigh
from pymc3 import transforms
from pymc3.distributions.distribution import Continuous, Discrete, draw_values, generate_samples
from pymc3.distributions.special import gammaln, multigammaln
from pymc3.distributions.dist_math import bound, logpow, factln
class MvNormal(Continuous):
r"""
This MvNormal can handle tensor arguments (to some degree). Also, the sampling routine uses the correct covariance,
but it's costly.
"""
def __init__(self, mu, tau, *args, **kwargs):
super(MvNormal, self).__init__(*args, **kwargs)
self.mean = self.median = self.mode = self.mu = mu
self.tau = tau
def random(self, point=None, size=None):
mu, tau = draw_values([self.mu, self.tau], point=point)
def _random(mean, tau, size=None):
supp_dim = mean.shape[-1]
mus_collapsed = mean.reshape((-1, supp_dim))
taus_collapsed = tau.reshape((-1, supp_dim, supp_dim))
# FIXME: do something smarter about tau/cov
covs_collapsed = np.apply_over_axes(lambda x,y: np.linalg.inv(x), taus_collapsed, 0)
from functools import partial
mvrvs = partial(stats.multivariate_normal.rvs, size=1)
res = map(mvrvs, mus_collapsed, covs_collapsed)
# FIXME: this is a hack; the PyMC sampling framework
# will incorrectly set `size == Distribution.shape` when a single
# sample is requested, implying that we want
# `Distribution.shape`-many samples of a
# `Distribution.shape` sized object: too many! That's why
# we're ignoring `size` right now and only ever asking
# for a single sample.
return np.asarray(res).reshape(mean.shape)
samples = generate_samples(_random,
mean=mu, tau=tau,
dist_shape=self.shape,
broadcast_shape=mu.shape,
size=size)
return samples
def logp(self, value):
mu = T.as_tensor_variable(self.mu)
tau = T.as_tensor_variable(self.tau)
reps_shape_T = tau.shape[:-2]
reps_shape_prod = T.prod(reps_shape_T, keepdims=True)
dist_shape_T = mu.shape[-1:]
# collapse reps dimensions
flat_supp_shape = T.concatenate((reps_shape_prod, dist_shape_T))
mus_collapsed = mu.reshape(flat_supp_shape, ndim=2)
taus_collapsed = tau.reshape(T.concatenate((reps_shape_prod,
dist_shape_T, dist_shape_T)), ndim=3)
# force value to conform to reps_shape
value_reshape = T.ones_like(mu) * value
values_collapsed = value_reshape.reshape(flat_supp_shape, ndim=2)
def single_logl(_mu, _tau, _value, k):
delta = _value - _mu
result = k * T.log(2 * np.pi) + T.log(det(_tau))
result += T.square(delta.dot(_tau)).sum(axis=-1)
return -result/2
from theano import scan
res, _ = scan(fn=single_logl
, sequences=[mus_collapsed, taus_collapsed, values_collapsed]
, non_sequences=[dist_shape_T]
, strict=True
)
return res.sum()
@AngelBerihuete
Copy link

Dear @brandonwillard, I am inexperienced in pymc3.

I'm trying to reproduce the example made by @benavente in (pymc-devs/pymc#1112) but I get errors in the execution. I've try the two ways you proposed: first, by using your extended mvnormal_extension.py here; second by using his code adding sum() at the end of

  • Using your extended mvnormal_extension.py and the following code
import numpy as np
import pymc3 as pm
import scipy
import theano
import theano.tensor as T
from mvnormal_extension import MvNormal

target_data = np.random.random((500, 16))

N_COMPONENTS = 5
N_SAMPLES, N_DIMS = target_data.shape

# Dirichilet prior.
ALPHA_0 = np.ones(N_COMPONENTS)
# Component means prior.
MU_0 = np.zeros(N_DIMS)
LAMB_0 = 2. * np.eye(N_DIMS)
# Components precision prior.
BETA_0, BETA_1 = .1, 2.    # Covariance stds prior uniform limits.
L_0 = 5.       # LKJ corr. shape. Larger shape -> more biased to identity.

# In order to convert the upper triangular correlation values to a
# complete correlation matrix, we need to construct an index matrix:
# Source: http://stackoverflow.com/q/29759789/1901296
N_ELEMS = N_DIMS * (N_DIMS - 1) / 2
tri_index = np.zeros([N_DIMS, N_DIMS], dtype=int)
tri_index[np.triu_indices(N_DIMS, k=1)] = np.arange(N_ELEMS)
tri_index[np.triu_indices(N_DIMS, k=1)[::-1]] = np.arange(N_ELEMS)

with pm.Model() as model:
    # Component weight prior.
    pi = pm.Dirichlet('pi', ALPHA_0, testval=np.ones(N_COMPONENTS) / N_COMPONENTS)
    #pi_potential = pm.Potential('pi_potential', T.switch(T.min(pi) < .01, -np.inf, 0))
    ###################
    # Components plate.
    ###################
    # Component means.
    mus = [pm.MvNormal('mu_{}'.format(i), MU_0, LAMB_0, shape=N_DIMS)
           for i in range(N_COMPONENTS)]
    # Component precisions.
    #lamb = diag(sigma) * corr(corr_shape) * diag(sigma)
    corr_vecs = [
            pm.LKJCorr('corr_vec_{}'.format(i), L_0, N_DIMS)
            for i in range(N_COMPONENTS)
    ]
    # Transform the correlation vector representations to matrices.
    corrs = [
        T.fill_diagonal(corr_vecs[i][tri_index], 1.)
        for i in range(N_COMPONENTS)
    ]
    # Stds for the correlation matrices.
    cov_stds = pm.Uniform('cov_stds', BETA_0, BETA_1, shape=(N_COMPONENTS, N_DIMS))
    # Finally re-compose the covariance matrices using diag(sigma) * corr * diag(sigma)
    # Source http://austinrochford.com/posts/2015-09-16-mvn-pymc3-lkj.html
    lambs = []
    for i in range(N_COMPONENTS):
        std_diag = T.diag(cov_stds[i])
        cov = std_diag.dot(corrs[i]).dot(std_diag)
        lambs.append(T.nlinalg.matrix_inverse(cov))
    stacked_mus = T.stack(mus)
    stacked_lambs = T.stack(lambs)
    #####################
    # Observations plate.
    #####################
    z = pm.Categorical('z', pi, shape=N_SAMPLES)
    y = MvNormal('y', stacked_mus[z], stacked_lambs[z],observed=target_data)

getting the error

Applied stickbreaking-transform to pi and added transformed pi_stickbreaking to model.
Applied interval-transform to cov_stds and added transformed cov_stds_interval to model.
Traceback (most recent call last):
  File "<stdin>", line 37, in <module>
  File "/home/angel/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 25, in __new__
    return model.Var(name, dist, data)
  File "/home/angel/anaconda2/lib/python2.7/site-packages/pymc3/model.py", line 306, in Var
    var = ObservedRV(name=name, data=data, distribution=dist, model=self)
  File "/home/angel/anaconda2/lib/python2.7/site-packages/pymc3/model.py", line 581, in __init__
    self.logp_elemwiset = distribution.logp(data)
  File "mvnormal_extension.py", line 50, in logp
    def logp(self, value):
NameError: global name 'T' is not defined
  • And by using the alternative DensityDist:
import numpy as np
import pymc3 as pm
import scipy
import theano
from theano import tensor

target_data = np.random.random((500, 16))

N_COMPONENTS = 5
N_SAMPLES, N_DIMS = target_data.shape

# Dirichilet prior.
ALPHA_0 = np.ones(N_COMPONENTS)
# Component means prior.
MU_0 = np.zeros(N_DIMS)
LAMB_0 = 1. * np.eye(N_DIMS)
# Components precision prior.
BETA_0, BETA_1 = 0., 1.    # Covariance stds prior uniform limits.
L_0 = 2.       # LKJ corr. shape. Larger shape -> more biased to identity.

# In order to convert the upper triangular correlation values to a
# complete correlation matrix, we need to construct an index matrix:
# Source: http://stackoverflow.com/q/29759789/1901296
N_ELEMS = N_DIMS * (N_DIMS - 1) / 2
tri_index = np.zeros([N_DIMS, N_DIMS], dtype=int)
tri_index[np.triu_indices(N_DIMS, k=1)] = np.arange(N_ELEMS)
tri_index[np.triu_indices(N_DIMS, k=1)[::-1]] = np.arange(N_ELEMS)

with pm.Model() as model:
    # Component weight prior.
    pi = pm.Dirichlet('pi', ALPHA_0, testval=np.ones(N_COMPONENTS) / N_COMPONENTS)
    #pi_potential = pm.Potential('pi_potential', tensor.switch(tensor.min(pi) < .01, -np.inf, 0))
    ###################
    # Components plate.
    ###################
    # Component means.
    mus = [pm.MvNormal('mu_{}'.format(i), MU_0, LAMB_0, shape=N_DIMS)
           for i in range(N_COMPONENTS)]
    # Component precisions.
    #lamb = diag(sigma) * corr(corr_shape) * diag(sigma)
    corr_vecs = [
            pm.LKJCorr('corr_vec_{}'.format(i), L_0, N_DIMS)
            for i in range(N_COMPONENTS)
    ]
    # Transform the correlation vector representations to matrices.
    corrs = [
        tensor.fill_diagonal(corr_vecs[i][tri_index], 1.)
        for i in range(N_COMPONENTS)
    ]
    # Stds for the correlation matrices.
    cov_stds = pm.Uniform('cov_stds', BETA_0, BETA_1, shape=(N_COMPONENTS, N_DIMS))
    # Finally re-compose the covariance matrices using diag(sigma) * corr * diag(sigma)
    # Source http://austinrochford.com/posts/2015-09-16-mvn-pymc3-lkj.html
    lambs = []
    for i in range(N_COMPONENTS):
        std_diag = tensor.diag(cov_stds[i])
        cov = std_diag.dot(corrs[i]).dot(std_diag)
        lambs.append(tensor.nlinalg.matrix_inverse(cov))
    stacked_mus = tensor.stack(mus)
    stacked_lambs = tensor.stack(lambs)
    #####################
    # Observations plate.
    #####################
    z = pm.Categorical('z', pi, shape=N_SAMPLES)
    #    
    @theano.as_op(itypes=[tensor.dmatrix, tensor.lvector, tensor.dmatrix, tensor.dtensor3],
                  otypes=[tensor.dscalar])
    def likelihood_op(values, z_values, mu_values, prec_values):
        logp = 0.
        for i in range(N_COMPONENTS):
            indices = z_values == i
            if not indices.any():
                continue
            logp += scipy.stats.multivariate_normal(mu_values[i], prec_values
                [i]).logpdf(values[indices]).sum()
        return logp
    def likelihood(values):
        return likelihood_op(values, z, stacked_mus, stacked_lambs)
    #
    y = pm.DensityDist('y', likelihood, observed=target_data)
    step1 = pm.Metropolis(vars=mus + lambs + [pi])
    step2 = pm.ElemwiseCategorical(vars=[z], values=list(range(N_COMPONENTS)))
    trace = pm.sample(10, step=[step1, step2])

getting the following error:

Applied stickbreaking-transform to pi and added transformed pi_stickbreaking to model.
Applied interval-transform to cov_stds and added transformed cov_stds_interval to model.
Traceback (most recent call last):
  File "<stdin>", line 55, in <module>
  File "/home/angel/anaconda2/lib/python2.7/site-packages/pymc3/sampling.py", line 150, in sample
    return sample_func(**sample_args)
  File "/home/angel/anaconda2/lib/python2.7/site-packages/pymc3/sampling.py", line 159, in _sample
    for i, strace in enumerate(sampling):
  File "/home/angel/anaconda2/lib/python2.7/site-packages/pymc3/sampling.py", line 241, in _iter_sample
    point = step.step(point)
  File "/home/angel/anaconda2/lib/python2.7/site-packages/pymc3/step_methods/compound.py", line 14, in step
    point = method.step(point)
  File "/home/angel/anaconda2/lib/python2.7/site-packages/pymc3/step_methods/compound.py", line 14, in step
    point = method.step(point)
  File "/home/angel/anaconda2/lib/python2.7/site-packages/pymc3/step_methods/arraystep.py", line 127, in step
    apoint = self.astep(bij.map(point))
  File "/home/angel/anaconda2/lib/python2.7/site-packages/pymc3/step_methods/metropolis.py", line 127, in astep
    q_new = metrop_select(self.delta_logp(q, q0), q, q0)
  File "/home/angel/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.py", line 871, in __call__
    storage_map=getattr(self.fn, 'storage_map', None))
  File "/home/angel/anaconda2/lib/python2.7/site-packages/theano/gof/link.py", line 314, in raise_with_op
    reraise(exc_type, exc_value, exc_trace)
  File "/home/angel/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.py", line 859, in __call__
    outputs = self.fn()
ValueError: expected an ndarray
Apply node that caused the error: Elemwise{Composite{((i0 + i1) - (i2 + i3))}}[(0, 0)](Sum{acc_dtype=float64}.0, FromFunctionOp{likelihood_op}.0, Sum{acc_dtype=float64}.0, FromFunctionOp{likelihood_op}.0)
Toposort index: 101
Inputs types: [TensorType(float64, scalar), TensorType(float64, scalar), TensorType(float64, scalar), TensorType(float64, scalar)]
Inputs shapes: [(), (), (), ()]
Inputs strides: [(), (), (), ()]
Inputs values: [array(-132.3236983886609), -12834.715596373449, array(-110.90354888959129), -13233.586732459016]
Outputs clients: [['output']]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

Any suggestions will be welcome!
Thanks
Angel

@AngelBerihuete
Copy link

AngelBerihuete commented Jul 20, 2016

Hi @brandonwillard
The problem above was fixed, but I have a doubt about your function. Coding a simple model like

import numpy as np
import pymc3 as pm
from mvnormal_extension  import MvNormal

with pm.Model() as model:
    var_x = MvNormal('var_x', mu = 3*np.zeros(2), tau = np.diag(np.ones(2)),  shape=2)
    trace = pm.sample(100)

pm.summary(trace)

I obtain these results

var_x:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------

  0.220            1.161            0.116            [-1.897, 2.245]
  0.165            1.024            0.102            [-2.626, 1.948]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|

  -1.897         -0.761         0.486          1.112          2.245
  -2.295         -0.426         0.178          0.681          2.634

which, of course, it is not correct because Mean should be near to (3,3).

Any clue why is this happening?
Thanks
Angel

@AngelBerihuete
Copy link

Forget the last message.

Of course the Means are near to (0,0) if MvNormal has

mu = 3*np.zeros(2)

It was my fault, I was thinking on

mu = 3*np.ones(2)

@lezorich
Copy link

Hi @AngelBerihuete, how did you fix the DensityDist alternative? I'm getting the same exception as you.

Thanks!
Lukas.

@AngelBerihuete
Copy link

Hi @lezorich, I'm so sorry for this huge delay.
Finally I took the normal-extension.py file coded by @brandonwillard.
Did you fix DensityDist alternative?
Angel

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment