DanielWeitzenfeld/ordinal_pymc3.py

## ordinal_pymc3.py
import pymc3 as pm
import numpy as np
import theano.tensor as T
from theano.compile.ops import as_op
from scipy.stats import norm

# trying to implement a model similar to the one in Doing Bayesian Data Analysis 23.4
# ordinal predicted variable, metric predictor

# generate fake likert-scale data, as generated by 50 different raters
n_raters = 50
n_y_levels = 5
n_ratings = 1000
ratings = np.random.choice([0,1,2,3,4], size=n_ratings)
rater = np.random.choice(np.arange(0, 50, 1), size=n_ratings)
rating_level_covariate = (np.random.random(size=n_ratings) > .5)

@as_op(itypes=[T.dvector, T.dvector, T.dscalar], otypes=[T.dvector])
def bucket_probabilities(theta, mu, sigma):
    """Given cutpoints and parameters of normal distributions, calculate probabilities for each ordinal outcome."""
    out = np.empty([n_ratings, n_y_levels])
    n = norm(loc=mu, scale=sigma)
    out[...,0] = n.cdf(theta[0])
    out[...,1] = np.maximum(0, n.cdf(theta[1]) - n.cdf(theta[0]))
    out[...,2] = np.maximum(0, n.cdf(theta[2]) - n.cdf(theta[1]))
    out[...,3] = np.maximum(0, n.cdf(theta[3]) - n.cdf(theta[2]))
    out[...,4] = 1 - n.cdf(theta[3])
    return out

# we want to fix the top and bottom thresholds, but let the others 'move'.  Use a masked array for simplicity.
thresh_mus = [k + .5 for k in range(1, n_y_levels)]
thresh_mus_observed = np.array(thresh_mus)
thresh_mus_observed[1:-1] = -999
thresh_mus_observed = np.ma.masked_values(thresh_mus_observed, value=-999)

with pm.Model() as ordinal_model:

    rater_hyper_mu = pm.Normal('rater_hyper_mu', 3, sd=10)
    rater_hyper_tau = pm.Normal('rater_hyper_tau', 3, sd=3)
    rater_mus = pm.Normal('rater_mus',
                          mu=rater_hyper_mu,
                          tau=rater_hyper_tau,
                          shape=n_raters,
                          testval=np.repeat(3.0, n_raters))

    theta = pm.Normal('theta',
                      mu=thresh_mus,
                      tau=np.repeat(.5 ** 2, len(thresh_mus)),
                      shape=len(thresh_mus),
                      observed=thresh_mus_observed,
                      testval=thresh_mus[1:-1])

    sigma = pm.Uniform('sigma',
                       n_y_levels/1000.0,
                       n_y_levels*10.0,
                       testval=2.5)

    beta_covariate = pm.Normal('beta_covariate', 0, sd=10)

    pr = bucket_probabilities(theta,
                              rater_mus[rater] + beta_covariate*rating_level_covariate,
                              sigma)

    obs = pm.Categorical('obs', pr, shape=n_ratings, observed=ratings)

    trace = pm.sample(3000)

# returns error:
# For compute_test_value, one input test value does not have the requested type.
# The error when converting the test value to that variable type:
# Wrong number of dimensions: expected 1, got 2 with shape (1000, 5).
	import pymc3 as pm
	import numpy as np
	import theano.tensor as T
	from theano.compile.ops import as_op
	from scipy.stats import norm

	# trying to implement a model similar to the one in Doing Bayesian Data Analysis 23.4
	# ordinal predicted variable, metric predictor

	# generate fake likert-scale data, as generated by 50 different raters
	n_raters = 50
	n_y_levels = 5
	n_ratings = 1000
	ratings = np.random.choice([0,1,2,3,4], size=n_ratings)
	rater = np.random.choice(np.arange(0, 50, 1), size=n_ratings)
	rating_level_covariate = (np.random.random(size=n_ratings) > .5)

	@as_op(itypes=[T.dvector, T.dvector, T.dscalar], otypes=[T.dvector])
	def bucket_probabilities(theta, mu, sigma):
	"""Given cutpoints and parameters of normal distributions, calculate probabilities for each ordinal outcome."""
	out = np.empty([n_ratings, n_y_levels])
	n = norm(loc=mu, scale=sigma)
	out[...,0] = n.cdf(theta[0])
	out[...,1] = np.maximum(0, n.cdf(theta[1]) - n.cdf(theta[0]))
	out[...,2] = np.maximum(0, n.cdf(theta[2]) - n.cdf(theta[1]))
	out[...,3] = np.maximum(0, n.cdf(theta[3]) - n.cdf(theta[2]))
	out[...,4] = 1 - n.cdf(theta[3])
	return out

	# we want to fix the top and bottom thresholds, but let the others 'move'. Use a masked array for simplicity.
	thresh_mus = [k + .5 for k in range(1, n_y_levels)]
	thresh_mus_observed = np.array(thresh_mus)
	thresh_mus_observed[1:-1] = -999
	thresh_mus_observed = np.ma.masked_values(thresh_mus_observed, value=-999)

	with pm.Model() as ordinal_model:

	rater_hyper_mu = pm.Normal('rater_hyper_mu', 3, sd=10)
	rater_hyper_tau = pm.Normal('rater_hyper_tau', 3, sd=3)
	rater_mus = pm.Normal('rater_mus',
	mu=rater_hyper_mu,
	tau=rater_hyper_tau,
	shape=n_raters,
	testval=np.repeat(3.0, n_raters))

	theta = pm.Normal('theta',
	mu=thresh_mus,
	tau=np.repeat(.5 ** 2, len(thresh_mus)),
	shape=len(thresh_mus),
	observed=thresh_mus_observed,
	testval=thresh_mus[1:-1])

	sigma = pm.Uniform('sigma',
	n_y_levels/1000.0,
	n_y_levels*10.0,
	testval=2.5)

	beta_covariate = pm.Normal('beta_covariate', 0, sd=10)

	pr = bucket_probabilities(theta,
	rater_mus[rater] + beta_covariate*rating_level_covariate,
	sigma)

	obs = pm.Categorical('obs', pr, shape=n_ratings, observed=ratings)

	trace = pm.sample(3000)

	# returns error:
	# For compute_test_value, one input test value does not have the requested type.
	# The error when converting the test value to that variable type:
	# Wrong number of dimensions: expected 1, got 2 with shape (1000, 5).