Last active
January 5, 2017 02:35
-
-
Save brandonwillard/d4ec9f9f2753d1d9ef1aab71799860f7 to your computer and use it in GitHub Desktop.
Logic and Tests for Automatic Distribution Shape Determination.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections | |
from itertools import cycle | |
import numpy as np | |
import theano.tensor as tt | |
import warnings | |
from theano.tensor.raw_random import _infer_ndim_bcast | |
class DistributionShape(object): | |
r"""Class for handling the shape information of distributions. | |
Distributions are specified in terms of the shape of their support, the shape | |
of the space of independent instances and the shape of the space of replications. | |
The "total" shape of the distribution is the concatenation of each of these | |
spaces, i.e. `dist.shape = shape_reps + shape_ind + shape_supp`. | |
We're able to specify the number of dimensions for the | |
support of a concrete distribution type (e.g. scalar distributions have | |
`ndim_supp=0` and a multivariate normal has vector support, | |
so `ndim_supp=1`) and have the exact sizes of each dimension symbolic. | |
To actually instantiate a distribution, | |
we at least need a list/tuple/vector (`ndim_supp` in | |
length) containing symbolic scalars, `shape_supp`, representing the | |
exact size of each dimension. In the case that `shape_supp` has an | |
unknown length at the graph building stage (e.g. it is a generic Theano | |
vector or tensor), we must provide `ndim_supp`. | |
The symbolic scalars `shape_supp` must either be required by a | |
distribution's constructor, or inferred through its required | |
parameters. Since most distributions are either scalar, or | |
have parameters within the space of their support (e.g. the | |
multivariate normal's mean parameter) inference can be | |
straight-forward. In the latter case, we refer to the parameters | |
as "informative". | |
We also attempt to handle the specification of a collections of independent-- | |
but not identical instances of the base distribution (each with support as above). | |
These have a space with shape `shape_ind`. Generally, `shape_ind` will be | |
implicitly given by the distribution's parameters. For instance, | |
if a multivariate normal distribution is instantiated with a matrix mean | |
parameter `mu`, we can assume that each row specifies the mean for an | |
independent distribution. In this case the covariance parameter would either | |
have to be an `ndim=3` tensor, for which the last two dimensions specify each | |
covariance matrix, or a single matrix that is to apply to each independent | |
variate implied by the matrix `mu`. | |
Here are a few ways of inferring shapes: | |
* When a distribution is scalar, then `shape_supp = ()` | |
* and has an informative parameter, e.g. `mu`, then `shape_ind = tt.shape(mu)`. | |
* When a distribution is multivariate | |
* and has an informative parameter, e.g. `mu`, then | |
`shape_supp = tt.shape(mu)[-ndim_supp:]` and `shape_ind = tt.shape(mu)[-ndim_supp:]`. | |
In all remaining cases the shapes must be provided by the caller. | |
`shape_reps` is always provided by the caller. | |
Members | |
---------- | |
shape_supp | |
tuple | |
Shape of the support for this distribution type. | |
shape_ind | |
tuple | |
Dimension of independent, but not necessarily identical, copies of | |
this distribution type. | |
shape_reps | |
tuple | |
Dimension of independent and identical copies of | |
this distribution type. | |
""" | |
def __init__(self, shape_supp=None, shape_ind=None, shape_reps=None, **kwargs): | |
r""" | |
Parameters | |
---------- | |
shape_supp | |
tuple | |
Shape of the support for this distribution type. | |
shape_ind | |
tuple | |
Dimension of independent, but not necessarily identical, copies of | |
this distribution type. | |
shape_reps | |
tuple | |
Dimension of independent and identical copies of | |
this distribution type. | |
""" | |
self.bcast = None | |
if (shape_supp is None) or (shape_ind is None) or (shape_reps is None): | |
# If we get only the old `shape` parameter, assume it's only | |
# specifying replications. | |
old_shape = kwargs.get('shape', None) | |
if old_shape is not None: | |
warnings.warn(('The `shape` parameter is deprecated; use `size` to' | |
' specify the shape and number of replications.'), | |
DeprecationWarning) | |
self.shape_supp = tuple() | |
self.shape_ind = tuple() | |
self.shape_reps = self.as_tensor_shape_variable(old_shape) | |
self.bcast += tuple(True if s_ == 1 else False | |
for s_ in old_shape) | |
else: | |
raise ValueError("shapes and bcast must be specified.") | |
else: | |
self.shape_supp = self.as_tensor_shape_variable(shape_supp) | |
self.ndim_supp = tt.get_vector_length(self.shape_supp) | |
self.shape_ind = self.as_tensor_shape_variable(shape_ind) | |
self.ndim_ind = tt.get_vector_length(self.shape_ind) | |
self.shape_reps = self.as_tensor_shape_variable(shape_reps) | |
self.ndim_reps = tt.get_vector_length(self.shape_reps) | |
ndim_sum = self.ndim_supp + self.ndim_ind + self.ndim_reps | |
if ndim_sum == 0: | |
self.shape = tt.constant([], dtype='int32') | |
else: | |
self.shape = tuple(self.shape_reps) +\ | |
tuple(self.shape_ind) +\ | |
tuple(self.shape_supp) | |
self.shape = tt.as_tensor_variable(self.shape) | |
if has_const_inputs(self.shape): | |
# FIXME: This feels like a hack. Seems like it would be better to | |
# evaluate somewhere else (e.g. exactly where a value is needed). | |
self.shape = self.shape.eval() | |
self.ndim = tt.get_vector_length(self.shape) | |
if self.bcast is None: | |
# FIXME: Determine actual broadcast dimensions. | |
self.bcast = [False] * self.ndim | |
@staticmethod | |
def as_tensor_shape_variable(var): | |
r"""Convert a shape variable into a suitable Theano shape variable. | |
Mostly copied from `_infer_ndim_bcast`. | |
""" | |
if var is None: | |
return tt.constant([], dtype='int64') | |
res = var | |
if isinstance(res, (tuple, list)): | |
if len(res) == 0: | |
return tt.constant([], dtype='int64') | |
res = tt.as_tensor_variable(res, ndim=1) | |
else: | |
if res.ndim != 1: | |
raise TypeError("shape must be a vector or list of scalar, got\ | |
'%s'" % res) | |
if (not (res.dtype.startswith('int') or res.dtype.startswith('uint'))): | |
raise TypeError('shape must be an integer vector or list', | |
res.dtype) | |
return res | |
@classmethod | |
def infer_shapes(cls, dist_params, ndim_supp, | |
ndims_params=None, size=None, shape=None, | |
**kwargs): | |
r"""This method attempts to automatically determine shape information | |
for a distribution (see the description in DistributionShape). | |
Parameters | |
---------- | |
dist_params: tuple | |
A tuple containing the distributions parameters. | |
ndim_supp: int | |
Dimension of the support. This value is used to infer the exact | |
shape of the support and independent terms from ``dist_params``. | |
ndims_params: tuple (int) | |
Number of dimensions for each parameter in ``dist_params`` | |
for a single variate. Used to determine the shape of the | |
independent variate space. If `None`, we assume all parameters | |
are scalars. | |
size: tuple (int) | |
Shape of replications. | |
shape: tuple (int) | |
Deprecated; use ``size``. | |
""" | |
if shape is not None: | |
warnings.warn(('The `shape` parameter is deprecated; use `size` to' | |
' specify the shape and number of replications.'), | |
DeprecationWarning) | |
if size is None: | |
size = shape | |
dist_params = tuple(tt.as_tensor_variable(x) for x in dist_params) | |
# Parameters need to match in shape (broadcast together); | |
# we use the following to determine what the [independent terms'] | |
# ultimate shape is. | |
if ndims_params is None: | |
# We assume the parameters are scalar. | |
infer_params = ((None, None) + dist_params) | |
ndim_ind, shape_ind, bcast = _infer_ndim_bcast(*infer_params) | |
else: | |
# We know the number of dimensions for each param (of one variate), | |
# so we can "drop" those known dimensions (by creating a dummy | |
# variable without them) and find the shape implied by broadcasting | |
# the reduced dimension params. | |
# XXX: The use of `tuple` on the shape object avoids issues in | |
# `theano.tensor.alloc` with `theano.tensor.get_vector_length` | |
# applied to the Subtensor that arises due to indexing. | |
dummy_params = tuple(p if n == 0 else tt.zeros(tuple(p.shape)[:-n]) | |
for p, n in zip(dist_params, | |
cycle(ndims_params))) | |
infer_params = ((None, None) + dummy_params) | |
ndim_ind, shape_ind, bcast = _infer_ndim_bcast(*infer_params) | |
if ndim_supp <= 0: | |
shape_supp = tuple() | |
else: | |
# TODO: Would be good to check that other params match | |
# in support dimension. | |
ref_shape = tt.shape(dist_params[0]) | |
shape_supp = ref_shape[-ndim_supp:] | |
# shape_ind = shape_ind[:-ndim_supp] | |
# ndim_ind -= ndim_supp | |
# We have to be careful with `as_tensor_variable`; it will produce | |
# empty collections with dtype=floatX, which violates our expectations | |
# for a shape object. | |
if size is None or np.alen(size) == 0: | |
shape_reps = np.array((), dtype=np.int) | |
elif np.shape(size) == (): | |
shape_reps = np.asarray((size,), dtype=np.int) | |
else: | |
shape_reps = np.asarray(size, dtype=np.int) | |
# TODO: We could use the bcast info we've obtained thus far. | |
# Add broadcast info from replication dimensions. | |
# bcast = tuple(True if s_ == 1 else False | |
# for s_ in shape_reps) + bcast | |
shape_reps = tt.as_tensor_variable(shape_reps, ndim=1) | |
return cls(shape_supp, shape_ind, shape_reps, **kwargs) | |
def test_distribution_shape(): | |
def evaled_map(d): | |
return {k: v if not hasattr(v, 'eval') else v.eval() | |
for k, v in d.items()} | |
# Scalar support only | |
test_params = (tt.as_tensor_variable(0),) | |
test_shape = DistributionShape.infer_shapes(test_params, ndim_supp=0) | |
test_res = evaled_map(test_shape.__dict__) | |
np.testing.assert_array_equal(test_res['bcast'], ()) | |
np.testing.assert_array_equal(test_res['shape'], ()) | |
np.testing.assert_array_equal(test_res['shape_ind'], ()) | |
np.testing.assert_array_equal(test_res['shape_reps'], ()) | |
np.testing.assert_array_equal(test_res['shape_supp'], ()) | |
# Scalar support with independent variates | |
test_params = (tt.as_tensor_variable([[0, 1], [2, 3]]),) | |
test_shape = DistributionShape.infer_shapes(test_params, ndim_supp=0) | |
test_res = evaled_map(test_shape.__dict__) | |
np.testing.assert_array_equal(test_res['bcast'], (False, False)) | |
np.testing.assert_array_equal(test_res['shape'], (2, 2)) | |
np.testing.assert_array_equal(test_res['shape_ind'], (2, 2)) | |
np.testing.assert_array_equal(test_res['shape_reps'], ()) | |
np.testing.assert_array_equal(test_res['shape_supp'], ()) | |
# Scalar support with independent and replicated variates | |
test_params = (tt.as_tensor_variable([[0, 1], [2, 3]]),) | |
test_shape = DistributionShape.infer_shapes(test_params, ndim_supp=0, | |
size=(3, 2)) | |
test_res = evaled_map(test_shape.__dict__) | |
np.testing.assert_array_equal(test_res['bcast'], [False]*4) | |
np.testing.assert_array_equal(test_res['shape'], (3, 2, 2, 2)) | |
np.testing.assert_array_equal(test_res['shape_ind'], (2, 2)) | |
np.testing.assert_array_equal(test_res['shape_reps'], (3, 2)) | |
np.testing.assert_array_equal(test_res['shape_supp'], ()) | |
# Scalar support, independent and replicated variates, and | |
# broadcasted parameters | |
test_params = (tt.as_tensor_variable([[0, 1], [2, 3]]), | |
tt.as_tensor_variable([0])) | |
for t_ in (test_params, list(reversed(test_params))): | |
test_shape = DistributionShape.infer_shapes(t_, ndim_supp=0, | |
size=(3, 2)) | |
test_res = evaled_map(test_shape.__dict__) | |
np.testing.assert_array_equal(test_res['bcast'], [False]*4) | |
np.testing.assert_array_equal(test_res['shape'], (3, 2, 2, 2)) | |
np.testing.assert_array_equal(test_res['shape_ind'], (2, 2)) | |
np.testing.assert_array_equal(test_res['shape_reps'], (3, 2)) | |
np.testing.assert_array_equal(test_res['shape_supp'], ()) | |
# Scalar support, independent and replicated variates, | |
# broadcasted parameters, and specified param dimensions | |
test_ndims_params = (2, 1) | |
test_params = (tt.as_tensor_variable([[0, 1], [2, 3]]), | |
tt.as_tensor_variable([-1, -2])) | |
test_shape = DistributionShape.infer_shapes( | |
test_params, ndim_supp=0, ndims_params=test_ndims_params, | |
size=(3, 2)) | |
test_res = evaled_map(test_shape.__dict__) | |
np.testing.assert_array_equal(test_res['bcast'], [False]*2) | |
np.testing.assert_array_equal(test_res['shape'], (3, 2)) | |
np.testing.assert_array_equal(test_res['shape_ind'], ()) | |
np.testing.assert_array_equal(test_res['shape_reps'], (3, 2)) | |
np.testing.assert_array_equal(test_res['shape_supp'], ()) | |
# In this one, independent variates are specified by a non-scalar | |
# second parameter that's specified to be scalar for a single variate. | |
# The first parameter is specified to be 2D, and is. | |
test_ndims_params = (2, 0) | |
test_params = (tt.as_tensor_variable([[0, 1], [2, 3]]), | |
tt.as_tensor_variable([-1, -2])) | |
test_shape = DistributionShape.infer_shapes( | |
test_params, ndim_supp=0, ndims_params=test_ndims_params, | |
size=(3, 2)) | |
test_res = evaled_map(test_shape.__dict__) | |
np.testing.assert_array_equal(test_res['bcast'], [False]*3) | |
np.testing.assert_array_equal(test_res['shape'], (3, 2, 2)) | |
np.testing.assert_array_equal(test_res['shape_ind'], (2,)) | |
np.testing.assert_array_equal(test_res['shape_reps'], (3, 2)) | |
np.testing.assert_array_equal(test_res['shape_supp'], ()) | |
# A vector support with replications and parameter broadcasting. | |
# This case covers a MvNormal given a single vector mean and two | |
# covariance/tau matrices (and replicated). | |
test_ndims_params = (2, 1) | |
test_params = (tt.as_tensor_variable([[[0, 1], [2, 3]], | |
[[4, 5], [6, 7]], | |
[[8, 9], [10, 11]] | |
]), | |
tt.as_tensor_variable([-1, -2])) | |
test_shape = DistributionShape.infer_shapes( | |
test_params, ndim_supp=1, ndims_params=test_ndims_params, | |
size=(5, 4)) | |
test_res = evaled_map(test_shape.__dict__) | |
np.testing.assert_array_equal(test_res['bcast'], [False]*4) | |
np.testing.assert_array_equal(test_res['shape'], (5, 4, 3, 2)) | |
np.testing.assert_array_equal(test_res['shape_ind'], (3,)) | |
np.testing.assert_array_equal(test_res['shape_reps'], (5, 4)) | |
np.testing.assert_array_equal(test_res['shape_supp'], (2,)) | |
def has_const_inputs(nodes): | |
r"""Checks that nodes have only constant inputs for | |
their Ops. Useful for justifying one-time evals. | |
""" | |
if not isinstance(nodes, collections.Iterable): | |
nodes = [nodes] | |
for node in nodes: | |
owner = getattr(node, 'owner', None) | |
if owner is not None: | |
if not has_const_inputs(owner.inputs): | |
return False | |
elif not isinstance(node, tt.Constant): | |
return False | |
return True | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment