Skip to content

Instantly share code, notes, and snippets.

@sammosummo
Created September 13, 2016 18:41
Show Gist options
  • Save sammosummo/7e3ba00ea66063616a1417d447479a4e to your computer and use it in GitHub Desktop.
Save sammosummo/7e3ba00ea66063616a1417d447479a4e to your computer and use it in GitHub Desktop.
Bayesian model of working-memory capacity.
"""Model of working-memory capacity for change-detection tasks.
This script implements a "slots-based" model of working memory capacity based
on the Cowan formula, modified to allow lapses and formalized in a Bayesian
framework.
Data are organised into a pandas data frame in which each row represents a
subject, and columns contain covariates or observations. "Covariates" can be
categorical or continuous. "Observations" are counts of trials and are named
`{x}_{y}` where `{x}` is a single uppercase letter indicating the count type
(either `S`, `D`, `F`, or `H`) and `{y}` is a positive integer indicating the
set size. Covariates should not have names that could be confused with
observations.
The code is designed to work on pure "between-subjects" designs; i.e., a
subject is assumed to have one set of parameter values for all trials in the
experiment. It is not optimal for "within-subjects" designs.
"""
from inspect import signature
import numpy as np
import pymc3 as pm
import theano.tensor as tt
from theano import dot
from patsy import dmatrix
def create_family(fn, dm, lh={}, sh={}, robust=False):
"""Returns a hierarchical family of random variables.
Families comprise a collection of children (e.g., subject-level) variables,
and parent variables that control the shape of the prior distribution from
which the children are drawn.
Args:
fn (str): Descriptive name for the family; e.g., `'kappa'`.
dm (patsy.DesignMatrix): The design matrix for the location parameters
of the prior distribution on the children variables.
lh (Optional [dict]): Additional details used in the creation of the
location parameters. Defaults to `{'prior'=pymc3.Normal, 'mu': 0,
'sd': 100}`.
sh (Optional [dict]): Additional details used in the creation of the
scale parameters. Defaults to `{'prior'=pymc3.HalfCauchy,
'beta': 30}`.
robust (Optional [bool]): If False, subject-level parameters are
normally distributed. If True, they have a non-central student
t distribution, and an additional nu parameter is added to the
model.
Returns:
list: Container of children (subject-level) variables.
"""
cns = ['%s__%s' % (fn, n) for n in ldm.design_info.column_names]
clocs = []
for cn in cns:
lp = {'prior': pm.Normal, 'mu': 0, 'sd': 100}
lp.update(lh)
f = lp.pop('prior')
lp_ = lp.copy()
p = signature(f.__init__).parameters
[lp_.pop(k) for k in lp.keys() if k not in p]
lp_['name'] = 'loc_%s' % cn
clocs.append(f(**lp_))
loc = dot(np.asarray(ldm), tt.stack(*clocs))
sp = {'prior': pm.HalfCauchy, 'beta': 30}
sp.update(sh)
f = sp.pop('prior')
sp_ = sp.copy()
p = signature(f.__init__).parameters
[sp_.pop(k) for k in sp.keys() if k not in p]
sp_['name'] = 'scale_%s' % fn
sc = f(**sp_)
# children priors
if robust is False:
children = pm.Normal(
name=fn, mu=loc, sd=sc, shape=(ldm.shape[0],)
)
else:
nu = pm.Exponential(name='nu_minus_one_%s' % fn, lam=1/29.) + 1
children = pm.StudentT(
name=fn, mu=loc, sd=sc, nu=nu, shape=(ldm.shape[0],)
)
return children
def make_model(data, formulae, robust=False):
"""Constructs the model.
Args:
data (pandas.DataFrame): Data formatted in the appropriate way.
formulae (dict): Dictionary of formulae. A constant is added for any
missing.
robust (Optional[bool]): Robust estimation. Default is False.
Returns:
None: Model is placed in context, ready for sampling.
"""
for name in ('kappa', 'gamma', 'zeta'):
if name not in formulae:
formulae[name] = '1'
dm = {k: dmatrix(formulae[k], data) for k in formulae}
kappa = create_family('kappa', dm['kappa'], robust=robust)
gamma = create_family('gamma', dm['gamma'], robust=robust)
zeta = create_family('zeta', dm['zeta'], robust=robust)
k = pm.Deterministic('k', tt.max((kappa, np.zeros(len(data))), axis=0))
g = pm.Deterministic('g', pm.invlogit(gamma))
z = pm.Deterministic('z', pm.invlogit(zeta))
for l in [int(c.replace('S_', '')) for c in data.columns if 'S_' in c]:
r = tt.min((k / l, np.ones(len(data))), axis=0)
f = (1 - z) * g + z * (1 - r) * g
h = (1 - z) * g + z * r + z * (1 - r) * g
pm.Binomial(
name='F_%i' % l, p=f, n=data['S_%i' % l].values,
observed=data['F_%i' % l].values
)
pm.Binomial(
name='H_%i' % l, p=h, n=data['D_%i' % l].values,
observed=data['H_%i' % l].values
)
def sample_model(model_name, data, formulae, n):
"""Draw from posterior.
"""
with pm.Model() as model:
make_model(data, formulae, True, False, False)
backend = pm.backends.Text(model_name)
pm.sample(n, trace=backend)
if __name__ == '__main__':
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment