Skip to content

Instantly share code, notes, and snippets.

@DanielWeitzenfeld
Created December 3, 2016 22:39
Show Gist options
  • Save DanielWeitzenfeld/91feb45b0a71dec0dbceddabafe23817 to your computer and use it in GitHub Desktop.
Save DanielWeitzenfeld/91feb45b0a71dec0dbceddabafe23817 to your computer and use it in GitHub Desktop.
Ordinal model in pymc3
import pymc3 as pm
import numpy as np
import theano.tensor as T
from theano.compile.ops import as_op
from scipy.stats import norm
# trying to implement a model similar to the one in Doing Bayesian Data Analysis 23.4
# ordinal predicted variable, metric predictor
# generate fake likert-scale data, as generated by 50 different raters
n_raters = 50
n_y_levels = 5
n_ratings = 1000
ratings = np.random.choice([0,1,2,3,4], size=n_ratings)
rater = np.random.choice(np.arange(0, 50, 1), size=n_ratings)
rating_level_covariate = (np.random.random(size=n_ratings) > .5)
@as_op(itypes=[T.dvector, T.dvector, T.dscalar], otypes=[T.dvector])
def bucket_probabilities(theta, mu, sigma):
"""Given cutpoints and parameters of normal distributions, calculate probabilities for each ordinal outcome."""
out = np.empty([n_ratings, n_y_levels])
n = norm(loc=mu, scale=sigma)
out[...,0] = n.cdf(theta[0])
out[...,1] = np.maximum(0, n.cdf(theta[1]) - n.cdf(theta[0]))
out[...,2] = np.maximum(0, n.cdf(theta[2]) - n.cdf(theta[1]))
out[...,3] = np.maximum(0, n.cdf(theta[3]) - n.cdf(theta[2]))
out[...,4] = 1 - n.cdf(theta[3])
return out
# we want to fix the top and bottom thresholds, but let the others 'move'. Use a masked array for simplicity.
thresh_mus = [k + .5 for k in range(1, n_y_levels)]
thresh_mus_observed = np.array(thresh_mus)
thresh_mus_observed[1:-1] = -999
thresh_mus_observed = np.ma.masked_values(thresh_mus_observed, value=-999)
with pm.Model() as ordinal_model:
rater_hyper_mu = pm.Normal('rater_hyper_mu', 3, sd=10)
rater_hyper_tau = pm.Normal('rater_hyper_tau', 3, sd=3)
rater_mus = pm.Normal('rater_mus',
mu=rater_hyper_mu,
tau=rater_hyper_tau,
shape=n_raters,
testval=np.repeat(3.0, n_raters))
theta = pm.Normal('theta',
mu=thresh_mus,
tau=np.repeat(.5 ** 2, len(thresh_mus)),
shape=len(thresh_mus),
observed=thresh_mus_observed,
testval=thresh_mus[1:-1])
sigma = pm.Uniform('sigma',
n_y_levels/1000.0,
n_y_levels*10.0,
testval=2.5)
beta_covariate = pm.Normal('beta_covariate', 0, sd=10)
pr = bucket_probabilities(theta,
rater_mus[rater] + beta_covariate*rating_level_covariate,
sigma)
obs = pm.Categorical('obs', pr, shape=n_ratings, observed=ratings)
trace = pm.sample(3000)
# returns error:
# For compute_test_value, one input test value does not have the requested type.
# The error when converting the test value to that variable type:
# Wrong number of dimensions: expected 1, got 2 with shape (1000, 5).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment