Created
December 3, 2016 22:39
-
-
Save DanielWeitzenfeld/91feb45b0a71dec0dbceddabafe23817 to your computer and use it in GitHub Desktop.
Ordinal model in pymc3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pymc3 as pm | |
import numpy as np | |
import theano.tensor as T | |
from theano.compile.ops import as_op | |
from scipy.stats import norm | |
# trying to implement a model similar to the one in Doing Bayesian Data Analysis 23.4 | |
# ordinal predicted variable, metric predictor | |
# generate fake likert-scale data, as generated by 50 different raters | |
n_raters = 50 | |
n_y_levels = 5 | |
n_ratings = 1000 | |
ratings = np.random.choice([0,1,2,3,4], size=n_ratings) | |
rater = np.random.choice(np.arange(0, 50, 1), size=n_ratings) | |
rating_level_covariate = (np.random.random(size=n_ratings) > .5) | |
@as_op(itypes=[T.dvector, T.dvector, T.dscalar], otypes=[T.dvector]) | |
def bucket_probabilities(theta, mu, sigma): | |
"""Given cutpoints and parameters of normal distributions, calculate probabilities for each ordinal outcome.""" | |
out = np.empty([n_ratings, n_y_levels]) | |
n = norm(loc=mu, scale=sigma) | |
out[...,0] = n.cdf(theta[0]) | |
out[...,1] = np.maximum(0, n.cdf(theta[1]) - n.cdf(theta[0])) | |
out[...,2] = np.maximum(0, n.cdf(theta[2]) - n.cdf(theta[1])) | |
out[...,3] = np.maximum(0, n.cdf(theta[3]) - n.cdf(theta[2])) | |
out[...,4] = 1 - n.cdf(theta[3]) | |
return out | |
# we want to fix the top and bottom thresholds, but let the others 'move'. Use a masked array for simplicity. | |
thresh_mus = [k + .5 for k in range(1, n_y_levels)] | |
thresh_mus_observed = np.array(thresh_mus) | |
thresh_mus_observed[1:-1] = -999 | |
thresh_mus_observed = np.ma.masked_values(thresh_mus_observed, value=-999) | |
with pm.Model() as ordinal_model: | |
rater_hyper_mu = pm.Normal('rater_hyper_mu', 3, sd=10) | |
rater_hyper_tau = pm.Normal('rater_hyper_tau', 3, sd=3) | |
rater_mus = pm.Normal('rater_mus', | |
mu=rater_hyper_mu, | |
tau=rater_hyper_tau, | |
shape=n_raters, | |
testval=np.repeat(3.0, n_raters)) | |
theta = pm.Normal('theta', | |
mu=thresh_mus, | |
tau=np.repeat(.5 ** 2, len(thresh_mus)), | |
shape=len(thresh_mus), | |
observed=thresh_mus_observed, | |
testval=thresh_mus[1:-1]) | |
sigma = pm.Uniform('sigma', | |
n_y_levels/1000.0, | |
n_y_levels*10.0, | |
testval=2.5) | |
beta_covariate = pm.Normal('beta_covariate', 0, sd=10) | |
pr = bucket_probabilities(theta, | |
rater_mus[rater] + beta_covariate*rating_level_covariate, | |
sigma) | |
obs = pm.Categorical('obs', pr, shape=n_ratings, observed=ratings) | |
trace = pm.sample(3000) | |
# returns error: | |
# For compute_test_value, one input test value does not have the requested type. | |
# The error when converting the test value to that variable type: | |
# Wrong number of dimensions: expected 1, got 2 with shape (1000, 5). |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment