Skip to content

Instantly share code, notes, and snippets.

@thelogicalgrammar
Created February 1, 2023 08:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thelogicalgrammar/fed5f4750d3e51d5906f06099dba889f to your computer and use it in GitHub Desktop.
Save thelogicalgrammar/fed5f4750d3e51d5906f06099dba889f to your computer and use it in GitHub Desktop.
Model with complex likelihood function
import numpy as np
import pandas as pd
import pymc as pm
import arviz as az
import pytensor as theano
import pytensor.tensor as tt
tprint = theano.printing.Print
import itertools as it
###### Simulate experiment functions
def normalize(arr, axis=0):
return arr / arr.sum(axis, keepdims=True)
def define_objects(n_beings=4, n_actions=3, full_output=False):
being = range(n_beings)
actions = range(n_beings, n_beings+n_actions)
signals = range(n_beings+n_actions)
scenes = np.array([
i
for i in it.product(being, actions, being)
# Note: agent and patient can't be the same
if i[0] != i[2]
])
language_interpret = np.array(list(it.permutations(signals)))
word_orders = np.array(list(it.permutations(range(3))))
languages = []
# for each interpretation function
for inter in language_interpret:
sub = []
# for each word order
for order in word_orders:
subsub = []
# for each scene
# scene has: (agent, action, patient)
for scene in scenes:
# signals for [agent, action, patient]
subsub.append(inter[scene][order])
sub.append(subsub)
languages.append(sub)
languages = np.array(languages)
if full_output:
return scenes, languages, language_interpret, np.array(word_orders)
return scenes, languages
def generate_trials(lang, n, scenes, n_scenes_trial):
index_scenes = np.array([
np.random.choice(
len(scenes),
4,
replace=False
)
for _ in range(n)
])
true_scenes_index = np.random.choice(
np.arange(n_scenes_trial),
size=len(index_scenes)
)
true_scenes = index_scenes[
np.arange(len(index_scenes)),
true_scenes_index
]
utterances = lang[true_scenes]
return true_scenes, index_scenes, utterances
def simulate_full_experiment(n_trials, n_participants):
scenes, languages, language_interpret, word_orders = define_objects(
full_output=True
)
true_scenes = []
indices_scenes_trials = []
signals = []
### Train each participant on different lang
indices_true_language = [
(
np.random.randint(low=0, high=len(languages)),
np.random.randint(low=0, high=6)
)
for _ in range(n_participants)
]
for i in range(n_participants):
index_true_language = indices_true_language[i]
trials = generate_trials(
languages[index_true_language],
n=n_trials,
scenes=scenes,
n_scenes_trial=4
)
t_scenes, i_scenes_trials, sign = trials
true_scenes.append(t_scenes)
indices_scenes_trials.append(i_scenes_trials)
signals.append(sign)
true_scenes = np.swapaxes(true_scenes, 0, 1)
indices_scenes_trials = np.swapaxes(indices_scenes_trials, 0, 1)
signals = np.swapaxes(signals, 0, 1)
scenes_trials = scenes[indices_scenes_trials]
true_scenes_trials = scenes[true_scenes]
returndict = {
'true_scenes': true_scenes,
'indices_scenes_trials': indices_scenes_trials,
'signals': signals,
'scenes_trials': scenes_trials,
'true_scenes_trials': true_scenes_trials,
'indices_true_language': np.array(indices_true_language)
}
return returndict
####### Define likelihood
def theano_normalize(tensor, axis):
return tensor / tensor.sum(axis, keepdims=True)
def update_weights_theano_multiple_participants(scene, signal,
interpretation_weights_original,
word_order_weights_original,
word_orders, learning_weights):
"""
This function takes a description of a single trial index of the experiment
as well as the participant's current posterior over
interpretations and word orders
and returns the updated participants' posterior.
NOTE: This function calculates everything for *all agents* for one trial
So it might differ from functions above that are written for one participant.
TODO: Is it possible to rewrite this to do all trials at once efficiently?
Maybe some clever cumulative operation?
Parameters
----------
scene: array
dims (participant, 3)
Description of scene that the signal describes.
A single scene is described by (agent, action, patient)
signal: array
dims (participant, 3)
The three columns report the indices of the signals
interpretation_weights_original: array
dims: (participant, signal, meaning)
shape: (# participants, 7, 7)
Each row is a the prob of the signal referring to each
meaning. Gets updated in this function in light
of evidence given by scene/signal combos.
word_order_weights_original: array
dims: (participant, word order)
shape: (# participants, 6)
word_orders: array
dims: (word order,3)
shape: (6,3)
Word orders (see functions above for description)
learning_weights: array
dims: (participant)
The learning weight of each participant
Returns
-------
tuple of arrays
interpretation_weights: array
Same shape as interpretation_weights_original
NOTE: normalized before returning
word_order_weights: array
Same shape as word_order_weights_original
NOTE: normalized before returning
"""
n_parti = interpretation_weights_original.shape[0]
### Snippet A (update interpretation weights)
# get one-hot vectors for the indices as described in the notebook
# in which to add the calculated weights that should be added.
# Dimensions (participant, object in scene, word order, signal)
# shape: (# participants, 3, 6, 3)
mask_orders = tt.eq(
tt.tile(word_orders, (n_parti,3,1,1)),
np.array([0,1,2])[None,:,None,None]
)
# Get the values from word_order_weights_original
# to add to the various elements
# using the one-hot vectors in mask_orders.
# values_to_add: shape (# participants, 3, 3)
# dims (participant, scene components, words in signal)
# rows: (subj, obj, act)
# cols: (first word, second word, third word)
values_to_add = (
# get the weights compatible with each word order
(word_order_weights_original[:,None,:,None] * mask_orders)
# sum over word order dimension
# to get the probability that it's ANY of those orders
.sum(2)
)
# indicates which participant
arg1 = tt.tile(tt.arange(n_parti).dimshuffle(0,'x','x'), (1,3,3))
# indicates which signals (rows)
arg2 = tt.tile(signal.dimshuffle(0,'x',1), (1,3,1))
# indicates which meanings (columns)
arg3 = tt.tile(scene.dimshuffle(0, 1,'x'), 3)
# insert value to right locations
# keep 0 everywhere else
interpretation_to_add = tt.set_subtensor(
# create an array with shape (participant, signal, meaning)
tt.zeros(interpretation_weights_original.shape)[
arg1, arg2, arg3
],
values_to_add
)
# Create array with new weights and then normalize
# in the meaning dimension
# (get a distribution over meanings given a signal
# which makes sense since this is an interpretation matrix)
# shape: participant, signal, meaning
interpretation_weights = theano_normalize(
# add to the original array
interpretation_weights_original
+ learning_weights[:,None,None]*interpretation_to_add,
axis=-1
)
# Snippet B (update word order weights)
# weights to add to the word order
word_order_to_add = interpretation_weights_original[
tt.tile(
tt.arange(n_parti).dimshuffle(0,'x','x'),
(1,6,3)
),
# for each participant,
# repeat signal once for each word order.
# dims (participant, word order, signal)
tt.tile(
signal.dimshuffle(0,'x',1),
(1,6,1)
),
scene[:,word_orders]
].prod(-1)
word_order_weights = theano_normalize(
word_order_weights_original
+ learning_weights[:,None]*word_order_to_add,
1
)
return interpretation_weights, word_order_weights
def softmax_theano(x, axis):
e_x = tt.exp(x - x.max(axis=axis, keepdims=True))
return e_x / e_x.sum(axis=axis, keepdims=True)
def probs_languages_to_probs_scenes_multiple_participants(
interpretation_weights, word_order_weights,
signals, scenes_trials, word_orders, softmax_alphas):
"""
Finds probability of choosing each of the four scenes in `scenes`
given the observed utterance and the current probabilities
of word-meaning association and word orders
Parameters
----------
interpretation_weights: array
Dimensions: (trial, participant, word, meaning)
NOTE: Trial dimension was added in the scan
word_order_weights: array
Dimensions: (trial, participant, word order)
NOTE: Trial dimension was added in the scan
signals: array
Dimensions: (trial, participant, sentence position)
scenes_trials: array
Dimensions: (trial, participant, scene, component)
word_orders: array
Dimensions: (word order, sentence position)
"""
# meaning that each word would be expressing
# assuming each word order is true
# dims (trial, participant, scene in trial, word orders, 3), values: meanings
# e.g., (40, 5, 4, 6, 3)
meanings_trials_orders = scenes_trials[:,:,:,word_orders]
# get the probability that the whole sentence
# observed in each trial expresses
# each of the scenes in the trial
# assuming each word order in turn
# Dimensions:
# (trial, participant, scene in trial, word order)
prob_sentence_by_trial_given_order = (
interpretation_weights[
tt.arange(interpretation_weights.shape[0])[:,None,None,None,None],
tt.arange(interpretation_weights.shape[1])[None,:,None,None,None],
signals[:,:,None,None,:],
meanings_trials_orders
]
# get joint probability of the observed words
# (for each trial, word order and scene)
.prod(-1)
)
# Marginalize out word order
# to get probs of observed words in scene
# Dimensions: (trial, participant, scene in trial)
unnormalized_probs = (
prob_sentence_by_trial_given_order
# multiply by probability of word orders
* word_order_weights.dimshuffle(0, 1, 'x', 2)
).sum(-1)
if softmax_alphas is None:
prodprobs = normalize(
unnormalized_probs,
2
)
else:
prodprobs = softmax_theano(
unnormalized_probs*softmax_alphas[:,None],
-1
)
return prodprobs
def factory_weight_model_multiple_participants(true_scenes_trials, signals, word_orders,
history_choices_indices, scenes_trials):
"""
Parameters
----------
history_choices_indices: None or array
If none, the model is defined without observed
"""
# with all these dimensions I've decided to be careful
# and explicitly write down all the shapes
n_trials, n_participants, n_scenes, _ = scenes_trials.shape
coords = {
"trial": np.arange(n_trials),
"participant": np.arange(n_participants),
"scene": np.arange(n_scenes),
"component": ['agent', 'action', 'scene'],
"word_order": np.arange(6),
"sentence_position": np.arange(3)
}
with pm.Model(coords=coords) as model:
##### Create data
true_scenes_trials_data = pm.Data(
'true_scenes_trials',
true_scenes_trials,
dims=('trial', 'participant', 'component'),
mutable=True
)
signals_data = pm.Data(
'signals',
signals,
dims=('trial', 'participant', 'sentence_position'),
mutable=True
)
word_orders_data = pm.Data(
'word_orders',
word_orders,
dims=('word_order', 'sentence_position'),
mutable=True
)
scenes_trials_data = pm.Data(
'scenes_trials',
scenes_trials,
dims=('trial', 'participant', 'scene', 'component'),
mutable=True
)
if history_choices_indices is None:
history_choices_indices_data = None
else:
history_choices_indices_data = pm.Data(
'history_choices_indices',
history_choices_indices,
dims=('trial', 'participant')
)
####### Define priors
# sample population-level hyperprior
# over the individual parameters of the Dirichlet following this:
# http://tdunning.blogspot.com/2010/04/sampling-dirichlet-distribution-revised.html
gammas = pm.Gamma(
'hyper_gammas',
alpha=5,
beta=2,
dims=('word_order')
)
hyper_alpha = pm.Deterministic(
'hyper_alpha',
gammas.sum()
)
# shape (# participants, 6)
# dim (participant, word order)
word_order_weights = pm.Dirichlet(
'word_order_prior',
gammas,
dims=('participant', 'word_order')
)
# shape (# participants)
learning_weights = pm.Gamma(
'learning_weights',
alpha=8.,
beta=15.,
dims=('participant')
)
softmax_alphas = pm.Exponential(
'softmax_alpha',
lam=0.5,
dims=('participant')
)
####### likelihood part
# The participants' marginal prior over
# interpretation weights is fixed in advance
# to be uniform, rather than estimated.
# dims (participant, word, meaning)
interpretation_weights = theano_normalize(
tt.ones((n_participants,7,7)),
axis=1
)
# scan is theano's way to do looping.
# probs of each word-meaning connection
# and each word order respectively
(probs_interpretations, probs_word_orders), outputs_info = theano.scan(
update_weights_theano_multiple_participants,
sequences=[
true_scenes_trials_data,
signals_data
],
outputs_info=[
interpretation_weights,
word_order_weights
],
non_sequences=[
word_orders_data,
learning_weights
]
)
# dims (trial, participant, scenes in trial)
# where there are always 4 scenes in the trial
prob_scenes = probs_languages_to_probs_scenes_multiple_participants(
probs_interpretations,
probs_word_orders,
signals_data,
scenes_trials_data,
word_orders_data,
softmax_alphas
)
pm.Categorical(
'chosen_scenes',
p=prob_scenes,
observed=history_choices_indices_data,
dims=('trial', 'participant')
)
return model
def prior_predictive_sample(n_trials, n_participants):
simulated_results = simulate_full_experiment(
n_trials,
n_participants
)
scenes, languages, language_interpret, word_orders = define_objects(
full_output=True
)
model = factory_weight_model_multiple_participants(
true_scenes_trials=simulated_results['true_scenes_trials'],
signals=simulated_results['signals'],
scenes_trials=simulated_results['scenes_trials'],
word_orders=word_orders,
history_choices_indices=None
)
with model:
simulated_data = pm.sample_prior_predictive(samples=1)
return simulated_results, simulated_data
if __name__=='__main__':
# get some samples to run inference with
simulated_results, data = prior_predictive_sample(
n_trials=10,
n_participants=10
)
analysis_arrays = data.constant_data
observed = data.prior.chosen_scenes.values[0,0]
_,_,_, word_orders = define_objects(
full_output=True
)
model = factory_weight_model_multiple_participants(
true_scenes_trials=analysis_arrays['true_scenes_trials'],
signals=analysis_arrays['signals'],
word_orders=word_orders,
history_choices_indices=observed,
scenes_trials=analysis_arrays['scenes_trials'],
)
with model:
trace = pm.sample()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment