thelogicalgrammar/model.py

## model.py
import numpy as np
import pandas as pd

import pymc as pm
import arviz as az
import pytensor as theano
import pytensor.tensor as tt
tprint = theano.printing.Print

import itertools as it

###### Simulate experiment functions

def normalize(arr, axis=0):
    return arr / arr.sum(axis, keepdims=True)


def define_objects(n_beings=4, n_actions=3, full_output=False):

    being = range(n_beings)
    actions = range(n_beings, n_beings+n_actions)
    signals = range(n_beings+n_actions)

    scenes = np.array([
        i
        for i in it.product(being, actions, being)
        # Note: agent and patient can't be the same
        if i[0] != i[2]
    ])

    language_interpret = np.array(list(it.permutations(signals)))
    word_orders = np.array(list(it.permutations(range(3))))

    languages = []
    # for each interpretation function
    for inter in language_interpret:
        sub = []
        # for each word order
        for order in word_orders:
            subsub = []
            # for each scene
            # scene has: (agent, action, patient)
            for scene in scenes:
                # signals for [agent, action, patient]
                subsub.append(inter[scene][order])
            sub.append(subsub)
        languages.append(sub)
    languages = np.array(languages)

    if full_output:
        return scenes, languages, language_interpret, np.array(word_orders)
    return scenes, languages


def generate_trials(lang, n, scenes, n_scenes_trial):
    index_scenes = np.array([
        np.random.choice(
            len(scenes),
            4,
            replace=False
        )
        for _ in range(n)
    ])

    true_scenes_index = np.random.choice(
        np.arange(n_scenes_trial),
        size=len(index_scenes)
    )
    true_scenes = index_scenes[
        np.arange(len(index_scenes)),
        true_scenes_index
    ]
    utterances = lang[true_scenes]

    return true_scenes, index_scenes, utterances

def simulate_full_experiment(n_trials, n_participants):

    scenes, languages, language_interpret, word_orders = define_objects(
        full_output=True
    )

    true_scenes = []
    indices_scenes_trials = []
    signals = []

    ### Train each participant on different lang
    indices_true_language = [
        (
            np.random.randint(low=0, high=len(languages)),
            np.random.randint(low=0, high=6)
        )
        for _ in range(n_participants)
    ]

    for i in range(n_participants):

        index_true_language = indices_true_language[i]

        trials = generate_trials(
            languages[index_true_language],
            n=n_trials,
            scenes=scenes,
            n_scenes_trial=4
        )

        t_scenes, i_scenes_trials, sign = trials

        true_scenes.append(t_scenes)
        indices_scenes_trials.append(i_scenes_trials)
        signals.append(sign)

    true_scenes = np.swapaxes(true_scenes, 0, 1)
    indices_scenes_trials = np.swapaxes(indices_scenes_trials, 0, 1)
    signals = np.swapaxes(signals, 0, 1)

    scenes_trials = scenes[indices_scenes_trials]
    true_scenes_trials = scenes[true_scenes]

    returndict = {
        'true_scenes': true_scenes,
        'indices_scenes_trials': indices_scenes_trials,
        'signals': signals,
        'scenes_trials': scenes_trials,
        'true_scenes_trials': true_scenes_trials,
        'indices_true_language': np.array(indices_true_language)
    }

    return returndict


####### Define likelihood

def theano_normalize(tensor, axis):
    return tensor / tensor.sum(axis, keepdims=True)


def update_weights_theano_multiple_participants(scene, signal,
                                                interpretation_weights_original,
                                                word_order_weights_original,
                                                word_orders, learning_weights):
    """
    This function takes a description of a single trial index of the experiment
    as well as the participant's current posterior over
    interpretations and word orders
    and returns the updated participants' posterior.

    NOTE: This function calculates everything for *all agents* for one trial
    So it might differ from functions above that are written for one participant.
    TODO: Is it possible to rewrite this to do all trials at once efficiently?
          Maybe some clever cumulative operation?

    Parameters
    ----------
    scene: array
        dims (participant, 3)
        Description of scene that the signal describes.
        A single scene is described by (agent, action, patient)
    signal: array
        dims (participant, 3)
        The three columns report the indices of the signals
    interpretation_weights_original: array
        dims: (participant, signal, meaning)
        shape: (# participants, 7, 7)
        Each row is a the prob of the signal referring to each
        meaning. Gets updated in this function in light
        of evidence given by scene/signal combos.
    word_order_weights_original: array
        dims: (participant, word order)
        shape: (# participants, 6)
    word_orders: array
        dims: (word order,3)
        shape: (6,3)
        Word orders (see functions above for description)
    learning_weights: array
        dims: (participant)
        The learning weight of each participant
    Returns
    -------
    tuple of arrays
        interpretation_weights: array
            Same shape as interpretation_weights_original
            NOTE: normalized before returning
        word_order_weights: array
            Same shape as word_order_weights_original
            NOTE: normalized before returning
    """

    n_parti = interpretation_weights_original.shape[0]

    ### Snippet A (update interpretation weights)

    # get one-hot vectors for the indices as described in the notebook
    # in which to add the calculated weights that should be added.
    # Dimensions (participant, object in scene, word order, signal)
    # shape: (# participants, 3, 6, 3)
    mask_orders = tt.eq(
        tt.tile(word_orders, (n_parti,3,1,1)),
        np.array([0,1,2])[None,:,None,None]
    )

    # Get the values from word_order_weights_original
    # to add to the various elements
    # using the one-hot vectors in mask_orders.
    # values_to_add: shape (# participants, 3, 3)
    # dims (participant, scene components, words in signal)
    # rows: (subj, obj, act)
    # cols: (first word, second word, third word)
    values_to_add = (
        # get the weights compatible with each word order
        (word_order_weights_original[:,None,:,None] * mask_orders)
        # sum over word order dimension
        # to get the probability that it's ANY of those orders
        .sum(2)
    )

    # indicates which participant
    arg1 = tt.tile(tt.arange(n_parti).dimshuffle(0,'x','x'), (1,3,3))
    # indicates which signals (rows)
    arg2 = tt.tile(signal.dimshuffle(0,'x',1), (1,3,1))
    # indicates which meanings (columns)
    arg3 = tt.tile(scene.dimshuffle(0, 1,'x'), 3)

    # insert value to right locations
    # keep 0 everywhere else
    interpretation_to_add = tt.set_subtensor(
        # create an array with shape (participant, signal, meaning)
        tt.zeros(interpretation_weights_original.shape)[
            arg1, arg2, arg3
        ],
        values_to_add
    )

    # Create array with new weights and then normalize
    # in the meaning dimension
    # (get a distribution over meanings given a signal
    # which makes sense since this is an interpretation matrix)
    # shape: participant, signal, meaning
    interpretation_weights = theano_normalize(
        # add to the original array
        interpretation_weights_original
        + learning_weights[:,None,None]*interpretation_to_add,
        axis=-1
    )

    # Snippet B (update word order weights)

    # weights to add to the word order
    word_order_to_add = interpretation_weights_original[
        tt.tile(
            tt.arange(n_parti).dimshuffle(0,'x','x'),
            (1,6,3)
        ),
        # for each participant,
        # repeat signal once for each word order.
        # dims (participant, word order, signal)
        tt.tile(
            signal.dimshuffle(0,'x',1),
            (1,6,1)
        ),
        scene[:,word_orders]
    ].prod(-1)

    word_order_weights = theano_normalize(
        word_order_weights_original
        + learning_weights[:,None]*word_order_to_add,
        1
    )

    return interpretation_weights, word_order_weights


def softmax_theano(x, axis):
    e_x = tt.exp(x - x.max(axis=axis, keepdims=True))
    return e_x / e_x.sum(axis=axis, keepdims=True)


def probs_languages_to_probs_scenes_multiple_participants(
                    interpretation_weights, word_order_weights,
                    signals, scenes_trials, word_orders, softmax_alphas):
    """
    Finds probability of choosing each of the four scenes in `scenes`
    given the observed utterance and the current probabilities
    of word-meaning association and word orders

    Parameters
    ----------
    interpretation_weights: array
        Dimensions: (trial, participant, word, meaning)
        NOTE: Trial dimension was added in the scan
    word_order_weights: array
        Dimensions: (trial, participant, word order)
        NOTE: Trial dimension was added in the scan
    signals: array
        Dimensions: (trial, participant, sentence position)
    scenes_trials: array
        Dimensions: (trial, participant, scene, component)
    word_orders: array
        Dimensions: (word order, sentence position)
    """

    # meaning that each word would be expressing
    # assuming each word order is true
    # dims (trial, participant, scene in trial, word orders, 3), values: meanings
    # e.g., (40, 5, 4, 6, 3)
    meanings_trials_orders = scenes_trials[:,:,:,word_orders]

    # get the probability that the whole sentence
    # observed in each trial expresses
    # each of the scenes in the trial
    # assuming each word order in turn
    # Dimensions:
    # (trial, participant, scene in trial, word order)
    prob_sentence_by_trial_given_order = (
        interpretation_weights[
            tt.arange(interpretation_weights.shape[0])[:,None,None,None,None],
            tt.arange(interpretation_weights.shape[1])[None,:,None,None,None],
            signals[:,:,None,None,:],
            meanings_trials_orders
        ]
        # get joint probability of the observed words
        # (for each trial, word order and scene)
        .prod(-1)
    )

    # Marginalize out word order
    # to get probs of observed words in scene
    # Dimensions: (trial, participant, scene in trial)
    unnormalized_probs = (
        prob_sentence_by_trial_given_order
        # multiply by probability of word orders
        * word_order_weights.dimshuffle(0, 1, 'x', 2)
    ).sum(-1)

    if softmax_alphas is None:
        prodprobs = normalize(
            unnormalized_probs,
            2
        )

    else:
        prodprobs = softmax_theano(
            unnormalized_probs*softmax_alphas[:,None],
            -1
        )

    return prodprobs


def factory_weight_model_multiple_participants(true_scenes_trials, signals, word_orders,
                                               history_choices_indices, scenes_trials):
    """
    Parameters
    ----------
    history_choices_indices: None or array
        If none, the model is defined without observed
    """

    # with all these dimensions I've decided to be careful
    # and explicitly write down all the shapes
    n_trials, n_participants, n_scenes, _ = scenes_trials.shape
    coords = {
        "trial": np.arange(n_trials),
        "participant": np.arange(n_participants),
        "scene": np.arange(n_scenes),
        "component": ['agent', 'action', 'scene'],
        "word_order": np.arange(6),
        "sentence_position": np.arange(3)
    }

    with pm.Model(coords=coords) as model:

        ##### Create data

        true_scenes_trials_data = pm.Data(
            'true_scenes_trials',
            true_scenes_trials,
            dims=('trial', 'participant', 'component'),
            mutable=True
        )

        signals_data = pm.Data(
            'signals',
            signals,
            dims=('trial', 'participant', 'sentence_position'),
            mutable=True
        )

        word_orders_data = pm.Data(
            'word_orders',
            word_orders,
            dims=('word_order', 'sentence_position'),
            mutable=True
        )

        scenes_trials_data = pm.Data(
            'scenes_trials',
            scenes_trials,
            dims=('trial', 'participant', 'scene', 'component'),
            mutable=True
        )

        if history_choices_indices is None:
            history_choices_indices_data = None
        else:
            history_choices_indices_data = pm.Data(
                'history_choices_indices',
                history_choices_indices,
                dims=('trial', 'participant')
            )

        ####### Define priors

        # sample population-level hyperprior
        # over the individual parameters of the Dirichlet following this:
        # http://tdunning.blogspot.com/2010/04/sampling-dirichlet-distribution-revised.html
        gammas = pm.Gamma(
            'hyper_gammas',
            alpha=5,
            beta=2,
            dims=('word_order')
        )

        hyper_alpha = pm.Deterministic(
            'hyper_alpha',
            gammas.sum()
        )

        # shape (# participants, 6)
        # dim (participant, word order)
        word_order_weights = pm.Dirichlet(
            'word_order_prior',
            gammas,
            dims=('participant', 'word_order')
        )


        # shape (# participants)
        learning_weights = pm.Gamma(
            'learning_weights',
            alpha=8.,
            beta=15.,
            dims=('participant')
        )

        softmax_alphas = pm.Exponential(
            'softmax_alpha',
            lam=0.5,
            dims=('participant')
        )

        ####### likelihood part

        # The participants' marginal prior over
        # interpretation weights is fixed in advance
        # to be uniform, rather than estimated.
        # dims (participant, word, meaning)
        interpretation_weights = theano_normalize(
            tt.ones((n_participants,7,7)),
            axis=1
        )

        # scan is theano's way to do looping.
        # probs of each word-meaning connection
        # and each word order respectively
        (probs_interpretations, probs_word_orders), outputs_info = theano.scan(
            update_weights_theano_multiple_participants,
            sequences=[
                true_scenes_trials_data,
                signals_data
            ],
            outputs_info=[
                interpretation_weights,
                word_order_weights
            ],
            non_sequences=[
                word_orders_data,
                learning_weights
            ]
        )

        # dims (trial, participant, scenes in trial)
        # where there are always 4 scenes in the trial
        prob_scenes = probs_languages_to_probs_scenes_multiple_participants(
            probs_interpretations,
            probs_word_orders,
            signals_data,
            scenes_trials_data,
            word_orders_data,
            softmax_alphas
        )

        pm.Categorical(
            'chosen_scenes',
            p=prob_scenes,
            observed=history_choices_indices_data,
            dims=('trial', 'participant')
        )

    return model


def prior_predictive_sample(n_trials, n_participants):

    simulated_results = simulate_full_experiment(
        n_trials,
        n_participants
    )

    scenes, languages, language_interpret, word_orders = define_objects(
        full_output=True
    )

    model = factory_weight_model_multiple_participants(
        true_scenes_trials=simulated_results['true_scenes_trials'],
        signals=simulated_results['signals'],
        scenes_trials=simulated_results['scenes_trials'],
        word_orders=word_orders,
        history_choices_indices=None
    )

    with model:
        simulated_data = pm.sample_prior_predictive(samples=1)

    return simulated_results, simulated_data


if __name__=='__main__':

    # get some samples to run inference with
    simulated_results, data = prior_predictive_sample(
        n_trials=10,
        n_participants=10
    )

    analysis_arrays = data.constant_data
    observed = data.prior.chosen_scenes.values[0,0]

    _,_,_, word_orders = define_objects(
        full_output=True
    )

    model = factory_weight_model_multiple_participants(
        true_scenes_trials=analysis_arrays['true_scenes_trials'],
        signals=analysis_arrays['signals'],
        word_orders=word_orders,
        history_choices_indices=observed,
        scenes_trials=analysis_arrays['scenes_trials'],
    )

    with model:
        trace = pm.sample()
	import numpy as np
	import pandas as pd

	import pymc as pm
	import arviz as az
	import pytensor as theano
	import pytensor.tensor as tt
	tprint = theano.printing.Print

	import itertools as it

	###### Simulate experiment functions

	def normalize(arr, axis=0):
	return arr / arr.sum(axis, keepdims=True)


	def define_objects(n_beings=4, n_actions=3, full_output=False):

	being = range(n_beings)
	actions = range(n_beings, n_beings+n_actions)
	signals = range(n_beings+n_actions)

	scenes = np.array([
	i
	for i in it.product(being, actions, being)
	# Note: agent and patient can't be the same
	if i[0] != i[2]
	])

	language_interpret = np.array(list(it.permutations(signals)))
	word_orders = np.array(list(it.permutations(range(3))))

	languages = []
	# for each interpretation function
	for inter in language_interpret:
	sub = []
	# for each word order
	for order in word_orders:
	subsub = []
	# for each scene
	# scene has: (agent, action, patient)
	for scene in scenes:
	# signals for [agent, action, patient]
	subsub.append(inter[scene][order])
	sub.append(subsub)
	languages.append(sub)
	languages = np.array(languages)

	if full_output:
	return scenes, languages, language_interpret, np.array(word_orders)
	return scenes, languages


	def generate_trials(lang, n, scenes, n_scenes_trial):
	index_scenes = np.array([
	np.random.choice(
	len(scenes),
	4,
	replace=False
	)
	for _ in range(n)
	])

	true_scenes_index = np.random.choice(
	np.arange(n_scenes_trial),
	size=len(index_scenes)
	)
	true_scenes = index_scenes[
	np.arange(len(index_scenes)),
	true_scenes_index
	]
	utterances = lang[true_scenes]

	return true_scenes, index_scenes, utterances

	def simulate_full_experiment(n_trials, n_participants):

	scenes, languages, language_interpret, word_orders = define_objects(
	full_output=True
	)

	true_scenes = []
	indices_scenes_trials = []
	signals = []

	### Train each participant on different lang
	indices_true_language = [
	(
	np.random.randint(low=0, high=len(languages)),
	np.random.randint(low=0, high=6)
	)
	for _ in range(n_participants)
	]

	for i in range(n_participants):

	index_true_language = indices_true_language[i]

	trials = generate_trials(
	languages[index_true_language],
	n=n_trials,
	scenes=scenes,
	n_scenes_trial=4
	)

	t_scenes, i_scenes_trials, sign = trials

	true_scenes.append(t_scenes)
	indices_scenes_trials.append(i_scenes_trials)
	signals.append(sign)

	true_scenes = np.swapaxes(true_scenes, 0, 1)
	indices_scenes_trials = np.swapaxes(indices_scenes_trials, 0, 1)
	signals = np.swapaxes(signals, 0, 1)

	scenes_trials = scenes[indices_scenes_trials]
	true_scenes_trials = scenes[true_scenes]

	returndict = {
	'true_scenes': true_scenes,
	'indices_scenes_trials': indices_scenes_trials,
	'signals': signals,
	'scenes_trials': scenes_trials,
	'true_scenes_trials': true_scenes_trials,
	'indices_true_language': np.array(indices_true_language)
	}

	return returndict


	####### Define likelihood

	def theano_normalize(tensor, axis):
	return tensor / tensor.sum(axis, keepdims=True)


	def update_weights_theano_multiple_participants(scene, signal,
	interpretation_weights_original,
	word_order_weights_original,
	word_orders, learning_weights):
	"""
	This function takes a description of a single trial index of the experiment
	as well as the participant's current posterior over
	interpretations and word orders
	and returns the updated participants' posterior.

	NOTE: This function calculates everything for all agents for one trial
	So it might differ from functions above that are written for one participant.
	TODO: Is it possible to rewrite this to do all trials at once efficiently?
	Maybe some clever cumulative operation?

	Parameters
	----------
	scene: array
	dims (participant, 3)
	Description of scene that the signal describes.
	A single scene is described by (agent, action, patient)
	signal: array
	dims (participant, 3)
	The three columns report the indices of the signals
	interpretation_weights_original: array
	dims: (participant, signal, meaning)
	shape: (# participants, 7, 7)
	Each row is a the prob of the signal referring to each
	meaning. Gets updated in this function in light
	of evidence given by scene/signal combos.
	word_order_weights_original: array
	dims: (participant, word order)
	shape: (# participants, 6)
	word_orders: array
	dims: (word order,3)
	shape: (6,3)
	Word orders (see functions above for description)
	learning_weights: array
	dims: (participant)
	The learning weight of each participant
	Returns
	-------
	tuple of arrays
	interpretation_weights: array
	Same shape as interpretation_weights_original
	NOTE: normalized before returning
	word_order_weights: array
	Same shape as word_order_weights_original
	NOTE: normalized before returning
	"""

	n_parti = interpretation_weights_original.shape[0]

	### Snippet A (update interpretation weights)

	# get one-hot vectors for the indices as described in the notebook
	# in which to add the calculated weights that should be added.
	# Dimensions (participant, object in scene, word order, signal)
	# shape: (# participants, 3, 6, 3)
	mask_orders = tt.eq(
	tt.tile(word_orders, (n_parti,3,1,1)),
	np.array([0,1,2])[None,:,None,None]
	)

	# Get the values from word_order_weights_original
	# to add to the various elements
	# using the one-hot vectors in mask_orders.
	# values_to_add: shape (# participants, 3, 3)
	# dims (participant, scene components, words in signal)
	# rows: (subj, obj, act)
	# cols: (first word, second word, third word)
	values_to_add = (
	# get the weights compatible with each word order
	(word_order_weights_original[:,None,:,None] * mask_orders)
	# sum over word order dimension
	# to get the probability that it's ANY of those orders
	.sum(2)
	)

	# indicates which participant
	arg1 = tt.tile(tt.arange(n_parti).dimshuffle(0,'x','x'), (1,3,3))
	# indicates which signals (rows)
	arg2 = tt.tile(signal.dimshuffle(0,'x',1), (1,3,1))
	# indicates which meanings (columns)
	arg3 = tt.tile(scene.dimshuffle(0, 1,'x'), 3)

	# insert value to right locations
	# keep 0 everywhere else
	interpretation_to_add = tt.set_subtensor(
	# create an array with shape (participant, signal, meaning)
	tt.zeros(interpretation_weights_original.shape)[
	arg1, arg2, arg3
	],
	values_to_add
	)

	# Create array with new weights and then normalize
	# in the meaning dimension
	# (get a distribution over meanings given a signal
	# which makes sense since this is an interpretation matrix)
	# shape: participant, signal, meaning
	interpretation_weights = theano_normalize(
	# add to the original array
	interpretation_weights_original
	+ learning_weights[:,None,None]*interpretation_to_add,
	axis=-1
	)

	# Snippet B (update word order weights)

	# weights to add to the word order
	word_order_to_add = interpretation_weights_original[
	tt.tile(
	tt.arange(n_parti).dimshuffle(0,'x','x'),
	(1,6,3)
	),
	# for each participant,
	# repeat signal once for each word order.
	# dims (participant, word order, signal)
	tt.tile(
	signal.dimshuffle(0,'x',1),
	(1,6,1)
	),
	scene[:,word_orders]
	].prod(-1)

	word_order_weights = theano_normalize(
	word_order_weights_original
	+ learning_weights[:,None]*word_order_to_add,
	1
	)

	return interpretation_weights, word_order_weights


	def softmax_theano(x, axis):
	e_x = tt.exp(x - x.max(axis=axis, keepdims=True))
	return e_x / e_x.sum(axis=axis, keepdims=True)


	def probs_languages_to_probs_scenes_multiple_participants(
	interpretation_weights, word_order_weights,
	signals, scenes_trials, word_orders, softmax_alphas):
	"""
	Finds probability of choosing each of the four scenes in `scenes`
	given the observed utterance and the current probabilities
	of word-meaning association and word orders

	Parameters
	----------
	interpretation_weights: array
	Dimensions: (trial, participant, word, meaning)
	NOTE: Trial dimension was added in the scan
	word_order_weights: array
	Dimensions: (trial, participant, word order)
	NOTE: Trial dimension was added in the scan
	signals: array
	Dimensions: (trial, participant, sentence position)
	scenes_trials: array
	Dimensions: (trial, participant, scene, component)
	word_orders: array
	Dimensions: (word order, sentence position)
	"""

	# meaning that each word would be expressing
	# assuming each word order is true
	# dims (trial, participant, scene in trial, word orders, 3), values: meanings
	# e.g., (40, 5, 4, 6, 3)
	meanings_trials_orders = scenes_trials[:,:,:,word_orders]

	# get the probability that the whole sentence
	# observed in each trial expresses
	# each of the scenes in the trial
	# assuming each word order in turn
	# Dimensions:
	# (trial, participant, scene in trial, word order)
	prob_sentence_by_trial_given_order = (
	interpretation_weights[
	tt.arange(interpretation_weights.shape[0])[:,None,None,None,None],
	tt.arange(interpretation_weights.shape[1])[None,:,None,None,None],
	signals[:,:,None,None,:],
	meanings_trials_orders
	]
	# get joint probability of the observed words
	# (for each trial, word order and scene)
	.prod(-1)
	)

	# Marginalize out word order
	# to get probs of observed words in scene
	# Dimensions: (trial, participant, scene in trial)
	unnormalized_probs = (
	prob_sentence_by_trial_given_order
	# multiply by probability of word orders
	* word_order_weights.dimshuffle(0, 1, 'x', 2)
	).sum(-1)

	if softmax_alphas is None:
	prodprobs = normalize(
	unnormalized_probs,
	2
	)

	else:
	prodprobs = softmax_theano(
	unnormalized_probs*softmax_alphas[:,None],
	-1
	)

	return prodprobs


	def factory_weight_model_multiple_participants(true_scenes_trials, signals, word_orders,
	history_choices_indices, scenes_trials):
	"""
	Parameters
	----------
	history_choices_indices: None or array
	If none, the model is defined without observed
	"""

	# with all these dimensions I've decided to be careful
	# and explicitly write down all the shapes
	n_trials, n_participants, n_scenes, _ = scenes_trials.shape
	coords = {
	"trial": np.arange(n_trials),
	"participant": np.arange(n_participants),
	"scene": np.arange(n_scenes),
	"component": ['agent', 'action', 'scene'],
	"word_order": np.arange(6),
	"sentence_position": np.arange(3)
	}

	with pm.Model(coords=coords) as model:

	##### Create data

	true_scenes_trials_data = pm.Data(
	'true_scenes_trials',
	true_scenes_trials,
	dims=('trial', 'participant', 'component'),
	mutable=True
	)

	signals_data = pm.Data(
	'signals',
	signals,
	dims=('trial', 'participant', 'sentence_position'),
	mutable=True
	)

	word_orders_data = pm.Data(
	'word_orders',
	word_orders,
	dims=('word_order', 'sentence_position'),
	mutable=True
	)

	scenes_trials_data = pm.Data(
	'scenes_trials',
	scenes_trials,
	dims=('trial', 'participant', 'scene', 'component'),
	mutable=True
	)

	if history_choices_indices is None:
	history_choices_indices_data = None
	else:
	history_choices_indices_data = pm.Data(
	'history_choices_indices',
	history_choices_indices,
	dims=('trial', 'participant')
	)

	####### Define priors

	# sample population-level hyperprior
	# over the individual parameters of the Dirichlet following this:
	# http://tdunning.blogspot.com/2010/04/sampling-dirichlet-distribution-revised.html
	gammas = pm.Gamma(
	'hyper_gammas',
	alpha=5,
	beta=2,
	dims=('word_order')
	)

	hyper_alpha = pm.Deterministic(
	'hyper_alpha',
	gammas.sum()
	)

	# shape (# participants, 6)
	# dim (participant, word order)
	word_order_weights = pm.Dirichlet(
	'word_order_prior',
	gammas,
	dims=('participant', 'word_order')
	)


	# shape (# participants)
	learning_weights = pm.Gamma(
	'learning_weights',
	alpha=8.,
	beta=15.,
	dims=('participant')
	)

	softmax_alphas = pm.Exponential(
	'softmax_alpha',
	lam=0.5,
	dims=('participant')
	)

	####### likelihood part

	# The participants' marginal prior over
	# interpretation weights is fixed in advance
	# to be uniform, rather than estimated.
	# dims (participant, word, meaning)
	interpretation_weights = theano_normalize(
	tt.ones((n_participants,7,7)),
	axis=1
	)

	# scan is theano's way to do looping.
	# probs of each word-meaning connection
	# and each word order respectively
	(probs_interpretations, probs_word_orders), outputs_info = theano.scan(
	update_weights_theano_multiple_participants,
	sequences=[
	true_scenes_trials_data,
	signals_data
	],
	outputs_info=[
	interpretation_weights,
	word_order_weights
	],
	non_sequences=[
	word_orders_data,
	learning_weights
	]
	)

	# dims (trial, participant, scenes in trial)
	# where there are always 4 scenes in the trial
	prob_scenes = probs_languages_to_probs_scenes_multiple_participants(
	probs_interpretations,
	probs_word_orders,
	signals_data,
	scenes_trials_data,
	word_orders_data,
	softmax_alphas
	)

	pm.Categorical(
	'chosen_scenes',
	p=prob_scenes,
	observed=history_choices_indices_data,
	dims=('trial', 'participant')
	)

	return model


	def prior_predictive_sample(n_trials, n_participants):

	simulated_results = simulate_full_experiment(
	n_trials,
	n_participants
	)

	scenes, languages, language_interpret, word_orders = define_objects(
	full_output=True
	)

	model = factory_weight_model_multiple_participants(
	true_scenes_trials=simulated_results['true_scenes_trials'],
	signals=simulated_results['signals'],
	scenes_trials=simulated_results['scenes_trials'],
	word_orders=word_orders,
	history_choices_indices=None
	)

	with model:
	simulated_data = pm.sample_prior_predictive(samples=1)

	return simulated_results, simulated_data


	if __name__=='__main__':

	# get some samples to run inference with
	simulated_results, data = prior_predictive_sample(
	n_trials=10,
	n_participants=10
	)

	analysis_arrays = data.constant_data
	observed = data.prior.chosen_scenes.values[0,0]

	_,_,_, word_orders = define_objects(
	full_output=True
	)

	model = factory_weight_model_multiple_participants(
	true_scenes_trials=analysis_arrays['true_scenes_trials'],
	signals=analysis_arrays['signals'],
	word_orders=word_orders,
	history_choices_indices=observed,
	scenes_trials=analysis_arrays['scenes_trials'],
	)

	with model:
	trace = pm.sample()