Created
February 1, 2023 08:38
-
-
Save thelogicalgrammar/fed5f4750d3e51d5906f06099dba889f to your computer and use it in GitHub Desktop.
Model with complex likelihood function
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import pymc as pm | |
import arviz as az | |
import pytensor as theano | |
import pytensor.tensor as tt | |
tprint = theano.printing.Print | |
import itertools as it | |
###### Simulate experiment functions | |
def normalize(arr, axis=0): | |
return arr / arr.sum(axis, keepdims=True) | |
def define_objects(n_beings=4, n_actions=3, full_output=False): | |
being = range(n_beings) | |
actions = range(n_beings, n_beings+n_actions) | |
signals = range(n_beings+n_actions) | |
scenes = np.array([ | |
i | |
for i in it.product(being, actions, being) | |
# Note: agent and patient can't be the same | |
if i[0] != i[2] | |
]) | |
language_interpret = np.array(list(it.permutations(signals))) | |
word_orders = np.array(list(it.permutations(range(3)))) | |
languages = [] | |
# for each interpretation function | |
for inter in language_interpret: | |
sub = [] | |
# for each word order | |
for order in word_orders: | |
subsub = [] | |
# for each scene | |
# scene has: (agent, action, patient) | |
for scene in scenes: | |
# signals for [agent, action, patient] | |
subsub.append(inter[scene][order]) | |
sub.append(subsub) | |
languages.append(sub) | |
languages = np.array(languages) | |
if full_output: | |
return scenes, languages, language_interpret, np.array(word_orders) | |
return scenes, languages | |
def generate_trials(lang, n, scenes, n_scenes_trial): | |
index_scenes = np.array([ | |
np.random.choice( | |
len(scenes), | |
4, | |
replace=False | |
) | |
for _ in range(n) | |
]) | |
true_scenes_index = np.random.choice( | |
np.arange(n_scenes_trial), | |
size=len(index_scenes) | |
) | |
true_scenes = index_scenes[ | |
np.arange(len(index_scenes)), | |
true_scenes_index | |
] | |
utterances = lang[true_scenes] | |
return true_scenes, index_scenes, utterances | |
def simulate_full_experiment(n_trials, n_participants): | |
scenes, languages, language_interpret, word_orders = define_objects( | |
full_output=True | |
) | |
true_scenes = [] | |
indices_scenes_trials = [] | |
signals = [] | |
### Train each participant on different lang | |
indices_true_language = [ | |
( | |
np.random.randint(low=0, high=len(languages)), | |
np.random.randint(low=0, high=6) | |
) | |
for _ in range(n_participants) | |
] | |
for i in range(n_participants): | |
index_true_language = indices_true_language[i] | |
trials = generate_trials( | |
languages[index_true_language], | |
n=n_trials, | |
scenes=scenes, | |
n_scenes_trial=4 | |
) | |
t_scenes, i_scenes_trials, sign = trials | |
true_scenes.append(t_scenes) | |
indices_scenes_trials.append(i_scenes_trials) | |
signals.append(sign) | |
true_scenes = np.swapaxes(true_scenes, 0, 1) | |
indices_scenes_trials = np.swapaxes(indices_scenes_trials, 0, 1) | |
signals = np.swapaxes(signals, 0, 1) | |
scenes_trials = scenes[indices_scenes_trials] | |
true_scenes_trials = scenes[true_scenes] | |
returndict = { | |
'true_scenes': true_scenes, | |
'indices_scenes_trials': indices_scenes_trials, | |
'signals': signals, | |
'scenes_trials': scenes_trials, | |
'true_scenes_trials': true_scenes_trials, | |
'indices_true_language': np.array(indices_true_language) | |
} | |
return returndict | |
####### Define likelihood | |
def theano_normalize(tensor, axis): | |
return tensor / tensor.sum(axis, keepdims=True) | |
def update_weights_theano_multiple_participants(scene, signal, | |
interpretation_weights_original, | |
word_order_weights_original, | |
word_orders, learning_weights): | |
""" | |
This function takes a description of a single trial index of the experiment | |
as well as the participant's current posterior over | |
interpretations and word orders | |
and returns the updated participants' posterior. | |
NOTE: This function calculates everything for *all agents* for one trial | |
So it might differ from functions above that are written for one participant. | |
TODO: Is it possible to rewrite this to do all trials at once efficiently? | |
Maybe some clever cumulative operation? | |
Parameters | |
---------- | |
scene: array | |
dims (participant, 3) | |
Description of scene that the signal describes. | |
A single scene is described by (agent, action, patient) | |
signal: array | |
dims (participant, 3) | |
The three columns report the indices of the signals | |
interpretation_weights_original: array | |
dims: (participant, signal, meaning) | |
shape: (# participants, 7, 7) | |
Each row is a the prob of the signal referring to each | |
meaning. Gets updated in this function in light | |
of evidence given by scene/signal combos. | |
word_order_weights_original: array | |
dims: (participant, word order) | |
shape: (# participants, 6) | |
word_orders: array | |
dims: (word order,3) | |
shape: (6,3) | |
Word orders (see functions above for description) | |
learning_weights: array | |
dims: (participant) | |
The learning weight of each participant | |
Returns | |
------- | |
tuple of arrays | |
interpretation_weights: array | |
Same shape as interpretation_weights_original | |
NOTE: normalized before returning | |
word_order_weights: array | |
Same shape as word_order_weights_original | |
NOTE: normalized before returning | |
""" | |
n_parti = interpretation_weights_original.shape[0] | |
### Snippet A (update interpretation weights) | |
# get one-hot vectors for the indices as described in the notebook | |
# in which to add the calculated weights that should be added. | |
# Dimensions (participant, object in scene, word order, signal) | |
# shape: (# participants, 3, 6, 3) | |
mask_orders = tt.eq( | |
tt.tile(word_orders, (n_parti,3,1,1)), | |
np.array([0,1,2])[None,:,None,None] | |
) | |
# Get the values from word_order_weights_original | |
# to add to the various elements | |
# using the one-hot vectors in mask_orders. | |
# values_to_add: shape (# participants, 3, 3) | |
# dims (participant, scene components, words in signal) | |
# rows: (subj, obj, act) | |
# cols: (first word, second word, third word) | |
values_to_add = ( | |
# get the weights compatible with each word order | |
(word_order_weights_original[:,None,:,None] * mask_orders) | |
# sum over word order dimension | |
# to get the probability that it's ANY of those orders | |
.sum(2) | |
) | |
# indicates which participant | |
arg1 = tt.tile(tt.arange(n_parti).dimshuffle(0,'x','x'), (1,3,3)) | |
# indicates which signals (rows) | |
arg2 = tt.tile(signal.dimshuffle(0,'x',1), (1,3,1)) | |
# indicates which meanings (columns) | |
arg3 = tt.tile(scene.dimshuffle(0, 1,'x'), 3) | |
# insert value to right locations | |
# keep 0 everywhere else | |
interpretation_to_add = tt.set_subtensor( | |
# create an array with shape (participant, signal, meaning) | |
tt.zeros(interpretation_weights_original.shape)[ | |
arg1, arg2, arg3 | |
], | |
values_to_add | |
) | |
# Create array with new weights and then normalize | |
# in the meaning dimension | |
# (get a distribution over meanings given a signal | |
# which makes sense since this is an interpretation matrix) | |
# shape: participant, signal, meaning | |
interpretation_weights = theano_normalize( | |
# add to the original array | |
interpretation_weights_original | |
+ learning_weights[:,None,None]*interpretation_to_add, | |
axis=-1 | |
) | |
# Snippet B (update word order weights) | |
# weights to add to the word order | |
word_order_to_add = interpretation_weights_original[ | |
tt.tile( | |
tt.arange(n_parti).dimshuffle(0,'x','x'), | |
(1,6,3) | |
), | |
# for each participant, | |
# repeat signal once for each word order. | |
# dims (participant, word order, signal) | |
tt.tile( | |
signal.dimshuffle(0,'x',1), | |
(1,6,1) | |
), | |
scene[:,word_orders] | |
].prod(-1) | |
word_order_weights = theano_normalize( | |
word_order_weights_original | |
+ learning_weights[:,None]*word_order_to_add, | |
1 | |
) | |
return interpretation_weights, word_order_weights | |
def softmax_theano(x, axis): | |
e_x = tt.exp(x - x.max(axis=axis, keepdims=True)) | |
return e_x / e_x.sum(axis=axis, keepdims=True) | |
def probs_languages_to_probs_scenes_multiple_participants( | |
interpretation_weights, word_order_weights, | |
signals, scenes_trials, word_orders, softmax_alphas): | |
""" | |
Finds probability of choosing each of the four scenes in `scenes` | |
given the observed utterance and the current probabilities | |
of word-meaning association and word orders | |
Parameters | |
---------- | |
interpretation_weights: array | |
Dimensions: (trial, participant, word, meaning) | |
NOTE: Trial dimension was added in the scan | |
word_order_weights: array | |
Dimensions: (trial, participant, word order) | |
NOTE: Trial dimension was added in the scan | |
signals: array | |
Dimensions: (trial, participant, sentence position) | |
scenes_trials: array | |
Dimensions: (trial, participant, scene, component) | |
word_orders: array | |
Dimensions: (word order, sentence position) | |
""" | |
# meaning that each word would be expressing | |
# assuming each word order is true | |
# dims (trial, participant, scene in trial, word orders, 3), values: meanings | |
# e.g., (40, 5, 4, 6, 3) | |
meanings_trials_orders = scenes_trials[:,:,:,word_orders] | |
# get the probability that the whole sentence | |
# observed in each trial expresses | |
# each of the scenes in the trial | |
# assuming each word order in turn | |
# Dimensions: | |
# (trial, participant, scene in trial, word order) | |
prob_sentence_by_trial_given_order = ( | |
interpretation_weights[ | |
tt.arange(interpretation_weights.shape[0])[:,None,None,None,None], | |
tt.arange(interpretation_weights.shape[1])[None,:,None,None,None], | |
signals[:,:,None,None,:], | |
meanings_trials_orders | |
] | |
# get joint probability of the observed words | |
# (for each trial, word order and scene) | |
.prod(-1) | |
) | |
# Marginalize out word order | |
# to get probs of observed words in scene | |
# Dimensions: (trial, participant, scene in trial) | |
unnormalized_probs = ( | |
prob_sentence_by_trial_given_order | |
# multiply by probability of word orders | |
* word_order_weights.dimshuffle(0, 1, 'x', 2) | |
).sum(-1) | |
if softmax_alphas is None: | |
prodprobs = normalize( | |
unnormalized_probs, | |
2 | |
) | |
else: | |
prodprobs = softmax_theano( | |
unnormalized_probs*softmax_alphas[:,None], | |
-1 | |
) | |
return prodprobs | |
def factory_weight_model_multiple_participants(true_scenes_trials, signals, word_orders, | |
history_choices_indices, scenes_trials): | |
""" | |
Parameters | |
---------- | |
history_choices_indices: None or array | |
If none, the model is defined without observed | |
""" | |
# with all these dimensions I've decided to be careful | |
# and explicitly write down all the shapes | |
n_trials, n_participants, n_scenes, _ = scenes_trials.shape | |
coords = { | |
"trial": np.arange(n_trials), | |
"participant": np.arange(n_participants), | |
"scene": np.arange(n_scenes), | |
"component": ['agent', 'action', 'scene'], | |
"word_order": np.arange(6), | |
"sentence_position": np.arange(3) | |
} | |
with pm.Model(coords=coords) as model: | |
##### Create data | |
true_scenes_trials_data = pm.Data( | |
'true_scenes_trials', | |
true_scenes_trials, | |
dims=('trial', 'participant', 'component'), | |
mutable=True | |
) | |
signals_data = pm.Data( | |
'signals', | |
signals, | |
dims=('trial', 'participant', 'sentence_position'), | |
mutable=True | |
) | |
word_orders_data = pm.Data( | |
'word_orders', | |
word_orders, | |
dims=('word_order', 'sentence_position'), | |
mutable=True | |
) | |
scenes_trials_data = pm.Data( | |
'scenes_trials', | |
scenes_trials, | |
dims=('trial', 'participant', 'scene', 'component'), | |
mutable=True | |
) | |
if history_choices_indices is None: | |
history_choices_indices_data = None | |
else: | |
history_choices_indices_data = pm.Data( | |
'history_choices_indices', | |
history_choices_indices, | |
dims=('trial', 'participant') | |
) | |
####### Define priors | |
# sample population-level hyperprior | |
# over the individual parameters of the Dirichlet following this: | |
# http://tdunning.blogspot.com/2010/04/sampling-dirichlet-distribution-revised.html | |
gammas = pm.Gamma( | |
'hyper_gammas', | |
alpha=5, | |
beta=2, | |
dims=('word_order') | |
) | |
hyper_alpha = pm.Deterministic( | |
'hyper_alpha', | |
gammas.sum() | |
) | |
# shape (# participants, 6) | |
# dim (participant, word order) | |
word_order_weights = pm.Dirichlet( | |
'word_order_prior', | |
gammas, | |
dims=('participant', 'word_order') | |
) | |
# shape (# participants) | |
learning_weights = pm.Gamma( | |
'learning_weights', | |
alpha=8., | |
beta=15., | |
dims=('participant') | |
) | |
softmax_alphas = pm.Exponential( | |
'softmax_alpha', | |
lam=0.5, | |
dims=('participant') | |
) | |
####### likelihood part | |
# The participants' marginal prior over | |
# interpretation weights is fixed in advance | |
# to be uniform, rather than estimated. | |
# dims (participant, word, meaning) | |
interpretation_weights = theano_normalize( | |
tt.ones((n_participants,7,7)), | |
axis=1 | |
) | |
# scan is theano's way to do looping. | |
# probs of each word-meaning connection | |
# and each word order respectively | |
(probs_interpretations, probs_word_orders), outputs_info = theano.scan( | |
update_weights_theano_multiple_participants, | |
sequences=[ | |
true_scenes_trials_data, | |
signals_data | |
], | |
outputs_info=[ | |
interpretation_weights, | |
word_order_weights | |
], | |
non_sequences=[ | |
word_orders_data, | |
learning_weights | |
] | |
) | |
# dims (trial, participant, scenes in trial) | |
# where there are always 4 scenes in the trial | |
prob_scenes = probs_languages_to_probs_scenes_multiple_participants( | |
probs_interpretations, | |
probs_word_orders, | |
signals_data, | |
scenes_trials_data, | |
word_orders_data, | |
softmax_alphas | |
) | |
pm.Categorical( | |
'chosen_scenes', | |
p=prob_scenes, | |
observed=history_choices_indices_data, | |
dims=('trial', 'participant') | |
) | |
return model | |
def prior_predictive_sample(n_trials, n_participants): | |
simulated_results = simulate_full_experiment( | |
n_trials, | |
n_participants | |
) | |
scenes, languages, language_interpret, word_orders = define_objects( | |
full_output=True | |
) | |
model = factory_weight_model_multiple_participants( | |
true_scenes_trials=simulated_results['true_scenes_trials'], | |
signals=simulated_results['signals'], | |
scenes_trials=simulated_results['scenes_trials'], | |
word_orders=word_orders, | |
history_choices_indices=None | |
) | |
with model: | |
simulated_data = pm.sample_prior_predictive(samples=1) | |
return simulated_results, simulated_data | |
if __name__=='__main__': | |
# get some samples to run inference with | |
simulated_results, data = prior_predictive_sample( | |
n_trials=10, | |
n_participants=10 | |
) | |
analysis_arrays = data.constant_data | |
observed = data.prior.chosen_scenes.values[0,0] | |
_,_,_, word_orders = define_objects( | |
full_output=True | |
) | |
model = factory_weight_model_multiple_participants( | |
true_scenes_trials=analysis_arrays['true_scenes_trials'], | |
signals=analysis_arrays['signals'], | |
word_orders=word_orders, | |
history_choices_indices=observed, | |
scenes_trials=analysis_arrays['scenes_trials'], | |
) | |
with model: | |
trace = pm.sample() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment