Skip to content

Instantly share code, notes, and snippets.

@brandonwillard
Last active January 5, 2017 02:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brandonwillard/d4ec9f9f2753d1d9ef1aab71799860f7 to your computer and use it in GitHub Desktop.
Save brandonwillard/d4ec9f9f2753d1d9ef1aab71799860f7 to your computer and use it in GitHub Desktop.
Logic and Tests for Automatic Distribution Shape Determination.
import collections
from itertools import cycle
import numpy as np
import theano.tensor as tt
import warnings
from theano.tensor.raw_random import _infer_ndim_bcast
class DistributionShape(object):
r"""Class for handling the shape information of distributions.
Distributions are specified in terms of the shape of their support, the shape
of the space of independent instances and the shape of the space of replications.
The "total" shape of the distribution is the concatenation of each of these
spaces, i.e. `dist.shape = shape_reps + shape_ind + shape_supp`.
We're able to specify the number of dimensions for the
support of a concrete distribution type (e.g. scalar distributions have
`ndim_supp=0` and a multivariate normal has vector support,
so `ndim_supp=1`) and have the exact sizes of each dimension symbolic.
To actually instantiate a distribution,
we at least need a list/tuple/vector (`ndim_supp` in
length) containing symbolic scalars, `shape_supp`, representing the
exact size of each dimension. In the case that `shape_supp` has an
unknown length at the graph building stage (e.g. it is a generic Theano
vector or tensor), we must provide `ndim_supp`.
The symbolic scalars `shape_supp` must either be required by a
distribution's constructor, or inferred through its required
parameters. Since most distributions are either scalar, or
have parameters within the space of their support (e.g. the
multivariate normal's mean parameter) inference can be
straight-forward. In the latter case, we refer to the parameters
as "informative".
We also attempt to handle the specification of a collections of independent--
but not identical instances of the base distribution (each with support as above).
These have a space with shape `shape_ind`. Generally, `shape_ind` will be
implicitly given by the distribution's parameters. For instance,
if a multivariate normal distribution is instantiated with a matrix mean
parameter `mu`, we can assume that each row specifies the mean for an
independent distribution. In this case the covariance parameter would either
have to be an `ndim=3` tensor, for which the last two dimensions specify each
covariance matrix, or a single matrix that is to apply to each independent
variate implied by the matrix `mu`.
Here are a few ways of inferring shapes:
* When a distribution is scalar, then `shape_supp = ()`
* and has an informative parameter, e.g. `mu`, then `shape_ind = tt.shape(mu)`.
* When a distribution is multivariate
* and has an informative parameter, e.g. `mu`, then
`shape_supp = tt.shape(mu)[-ndim_supp:]` and `shape_ind = tt.shape(mu)[-ndim_supp:]`.
In all remaining cases the shapes must be provided by the caller.
`shape_reps` is always provided by the caller.
Members
----------
shape_supp
tuple
Shape of the support for this distribution type.
shape_ind
tuple
Dimension of independent, but not necessarily identical, copies of
this distribution type.
shape_reps
tuple
Dimension of independent and identical copies of
this distribution type.
"""
def __init__(self, shape_supp=None, shape_ind=None, shape_reps=None, **kwargs):
r"""
Parameters
----------
shape_supp
tuple
Shape of the support for this distribution type.
shape_ind
tuple
Dimension of independent, but not necessarily identical, copies of
this distribution type.
shape_reps
tuple
Dimension of independent and identical copies of
this distribution type.
"""
self.bcast = None
if (shape_supp is None) or (shape_ind is None) or (shape_reps is None):
# If we get only the old `shape` parameter, assume it's only
# specifying replications.
old_shape = kwargs.get('shape', None)
if old_shape is not None:
warnings.warn(('The `shape` parameter is deprecated; use `size` to'
' specify the shape and number of replications.'),
DeprecationWarning)
self.shape_supp = tuple()
self.shape_ind = tuple()
self.shape_reps = self.as_tensor_shape_variable(old_shape)
self.bcast += tuple(True if s_ == 1 else False
for s_ in old_shape)
else:
raise ValueError("shapes and bcast must be specified.")
else:
self.shape_supp = self.as_tensor_shape_variable(shape_supp)
self.ndim_supp = tt.get_vector_length(self.shape_supp)
self.shape_ind = self.as_tensor_shape_variable(shape_ind)
self.ndim_ind = tt.get_vector_length(self.shape_ind)
self.shape_reps = self.as_tensor_shape_variable(shape_reps)
self.ndim_reps = tt.get_vector_length(self.shape_reps)
ndim_sum = self.ndim_supp + self.ndim_ind + self.ndim_reps
if ndim_sum == 0:
self.shape = tt.constant([], dtype='int32')
else:
self.shape = tuple(self.shape_reps) +\
tuple(self.shape_ind) +\
tuple(self.shape_supp)
self.shape = tt.as_tensor_variable(self.shape)
if has_const_inputs(self.shape):
# FIXME: This feels like a hack. Seems like it would be better to
# evaluate somewhere else (e.g. exactly where a value is needed).
self.shape = self.shape.eval()
self.ndim = tt.get_vector_length(self.shape)
if self.bcast is None:
# FIXME: Determine actual broadcast dimensions.
self.bcast = [False] * self.ndim
@staticmethod
def as_tensor_shape_variable(var):
r"""Convert a shape variable into a suitable Theano shape variable.
Mostly copied from `_infer_ndim_bcast`.
"""
if var is None:
return tt.constant([], dtype='int64')
res = var
if isinstance(res, (tuple, list)):
if len(res) == 0:
return tt.constant([], dtype='int64')
res = tt.as_tensor_variable(res, ndim=1)
else:
if res.ndim != 1:
raise TypeError("shape must be a vector or list of scalar, got\
'%s'" % res)
if (not (res.dtype.startswith('int') or res.dtype.startswith('uint'))):
raise TypeError('shape must be an integer vector or list',
res.dtype)
return res
@classmethod
def infer_shapes(cls, dist_params, ndim_supp,
ndims_params=None, size=None, shape=None,
**kwargs):
r"""This method attempts to automatically determine shape information
for a distribution (see the description in DistributionShape).
Parameters
----------
dist_params: tuple
A tuple containing the distributions parameters.
ndim_supp: int
Dimension of the support. This value is used to infer the exact
shape of the support and independent terms from ``dist_params``.
ndims_params: tuple (int)
Number of dimensions for each parameter in ``dist_params``
for a single variate. Used to determine the shape of the
independent variate space. If `None`, we assume all parameters
are scalars.
size: tuple (int)
Shape of replications.
shape: tuple (int)
Deprecated; use ``size``.
"""
if shape is not None:
warnings.warn(('The `shape` parameter is deprecated; use `size` to'
' specify the shape and number of replications.'),
DeprecationWarning)
if size is None:
size = shape
dist_params = tuple(tt.as_tensor_variable(x) for x in dist_params)
# Parameters need to match in shape (broadcast together);
# we use the following to determine what the [independent terms']
# ultimate shape is.
if ndims_params is None:
# We assume the parameters are scalar.
infer_params = ((None, None) + dist_params)
ndim_ind, shape_ind, bcast = _infer_ndim_bcast(*infer_params)
else:
# We know the number of dimensions for each param (of one variate),
# so we can "drop" those known dimensions (by creating a dummy
# variable without them) and find the shape implied by broadcasting
# the reduced dimension params.
# XXX: The use of `tuple` on the shape object avoids issues in
# `theano.tensor.alloc` with `theano.tensor.get_vector_length`
# applied to the Subtensor that arises due to indexing.
dummy_params = tuple(p if n == 0 else tt.zeros(tuple(p.shape)[:-n])
for p, n in zip(dist_params,
cycle(ndims_params)))
infer_params = ((None, None) + dummy_params)
ndim_ind, shape_ind, bcast = _infer_ndim_bcast(*infer_params)
if ndim_supp <= 0:
shape_supp = tuple()
else:
# TODO: Would be good to check that other params match
# in support dimension.
ref_shape = tt.shape(dist_params[0])
shape_supp = ref_shape[-ndim_supp:]
# shape_ind = shape_ind[:-ndim_supp]
# ndim_ind -= ndim_supp
# We have to be careful with `as_tensor_variable`; it will produce
# empty collections with dtype=floatX, which violates our expectations
# for a shape object.
if size is None or np.alen(size) == 0:
shape_reps = np.array((), dtype=np.int)
elif np.shape(size) == ():
shape_reps = np.asarray((size,), dtype=np.int)
else:
shape_reps = np.asarray(size, dtype=np.int)
# TODO: We could use the bcast info we've obtained thus far.
# Add broadcast info from replication dimensions.
# bcast = tuple(True if s_ == 1 else False
# for s_ in shape_reps) + bcast
shape_reps = tt.as_tensor_variable(shape_reps, ndim=1)
return cls(shape_supp, shape_ind, shape_reps, **kwargs)
def test_distribution_shape():
def evaled_map(d):
return {k: v if not hasattr(v, 'eval') else v.eval()
for k, v in d.items()}
# Scalar support only
test_params = (tt.as_tensor_variable(0),)
test_shape = DistributionShape.infer_shapes(test_params, ndim_supp=0)
test_res = evaled_map(test_shape.__dict__)
np.testing.assert_array_equal(test_res['bcast'], ())
np.testing.assert_array_equal(test_res['shape'], ())
np.testing.assert_array_equal(test_res['shape_ind'], ())
np.testing.assert_array_equal(test_res['shape_reps'], ())
np.testing.assert_array_equal(test_res['shape_supp'], ())
# Scalar support with independent variates
test_params = (tt.as_tensor_variable([[0, 1], [2, 3]]),)
test_shape = DistributionShape.infer_shapes(test_params, ndim_supp=0)
test_res = evaled_map(test_shape.__dict__)
np.testing.assert_array_equal(test_res['bcast'], (False, False))
np.testing.assert_array_equal(test_res['shape'], (2, 2))
np.testing.assert_array_equal(test_res['shape_ind'], (2, 2))
np.testing.assert_array_equal(test_res['shape_reps'], ())
np.testing.assert_array_equal(test_res['shape_supp'], ())
# Scalar support with independent and replicated variates
test_params = (tt.as_tensor_variable([[0, 1], [2, 3]]),)
test_shape = DistributionShape.infer_shapes(test_params, ndim_supp=0,
size=(3, 2))
test_res = evaled_map(test_shape.__dict__)
np.testing.assert_array_equal(test_res['bcast'], [False]*4)
np.testing.assert_array_equal(test_res['shape'], (3, 2, 2, 2))
np.testing.assert_array_equal(test_res['shape_ind'], (2, 2))
np.testing.assert_array_equal(test_res['shape_reps'], (3, 2))
np.testing.assert_array_equal(test_res['shape_supp'], ())
# Scalar support, independent and replicated variates, and
# broadcasted parameters
test_params = (tt.as_tensor_variable([[0, 1], [2, 3]]),
tt.as_tensor_variable([0]))
for t_ in (test_params, list(reversed(test_params))):
test_shape = DistributionShape.infer_shapes(t_, ndim_supp=0,
size=(3, 2))
test_res = evaled_map(test_shape.__dict__)
np.testing.assert_array_equal(test_res['bcast'], [False]*4)
np.testing.assert_array_equal(test_res['shape'], (3, 2, 2, 2))
np.testing.assert_array_equal(test_res['shape_ind'], (2, 2))
np.testing.assert_array_equal(test_res['shape_reps'], (3, 2))
np.testing.assert_array_equal(test_res['shape_supp'], ())
# Scalar support, independent and replicated variates,
# broadcasted parameters, and specified param dimensions
test_ndims_params = (2, 1)
test_params = (tt.as_tensor_variable([[0, 1], [2, 3]]),
tt.as_tensor_variable([-1, -2]))
test_shape = DistributionShape.infer_shapes(
test_params, ndim_supp=0, ndims_params=test_ndims_params,
size=(3, 2))
test_res = evaled_map(test_shape.__dict__)
np.testing.assert_array_equal(test_res['bcast'], [False]*2)
np.testing.assert_array_equal(test_res['shape'], (3, 2))
np.testing.assert_array_equal(test_res['shape_ind'], ())
np.testing.assert_array_equal(test_res['shape_reps'], (3, 2))
np.testing.assert_array_equal(test_res['shape_supp'], ())
# In this one, independent variates are specified by a non-scalar
# second parameter that's specified to be scalar for a single variate.
# The first parameter is specified to be 2D, and is.
test_ndims_params = (2, 0)
test_params = (tt.as_tensor_variable([[0, 1], [2, 3]]),
tt.as_tensor_variable([-1, -2]))
test_shape = DistributionShape.infer_shapes(
test_params, ndim_supp=0, ndims_params=test_ndims_params,
size=(3, 2))
test_res = evaled_map(test_shape.__dict__)
np.testing.assert_array_equal(test_res['bcast'], [False]*3)
np.testing.assert_array_equal(test_res['shape'], (3, 2, 2))
np.testing.assert_array_equal(test_res['shape_ind'], (2,))
np.testing.assert_array_equal(test_res['shape_reps'], (3, 2))
np.testing.assert_array_equal(test_res['shape_supp'], ())
# A vector support with replications and parameter broadcasting.
# This case covers a MvNormal given a single vector mean and two
# covariance/tau matrices (and replicated).
test_ndims_params = (2, 1)
test_params = (tt.as_tensor_variable([[[0, 1], [2, 3]],
[[4, 5], [6, 7]],
[[8, 9], [10, 11]]
]),
tt.as_tensor_variable([-1, -2]))
test_shape = DistributionShape.infer_shapes(
test_params, ndim_supp=1, ndims_params=test_ndims_params,
size=(5, 4))
test_res = evaled_map(test_shape.__dict__)
np.testing.assert_array_equal(test_res['bcast'], [False]*4)
np.testing.assert_array_equal(test_res['shape'], (5, 4, 3, 2))
np.testing.assert_array_equal(test_res['shape_ind'], (3,))
np.testing.assert_array_equal(test_res['shape_reps'], (5, 4))
np.testing.assert_array_equal(test_res['shape_supp'], (2,))
def has_const_inputs(nodes):
r"""Checks that nodes have only constant inputs for
their Ops. Useful for justifying one-time evals.
"""
if not isinstance(nodes, collections.Iterable):
nodes = [nodes]
for node in nodes:
owner = getattr(node, 'owner', None)
if owner is not None:
if not has_const_inputs(owner.inputs):
return False
elif not isinstance(node, tt.Constant):
return False
return True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment