Skip to content

Instantly share code, notes, and snippets.

@DanielWeitzenfeld
Created December 5, 2016 14:31
Show Gist options
  • Save DanielWeitzenfeld/d9ac64f76281e6c1d29217af76449664 to your computer and use it in GitHub Desktop.
Save DanielWeitzenfeld/d9ac64f76281e6c1d29217af76449664 to your computer and use it in GitHub Desktop.
functional implementation of ordinal predicted variable
import seaborn as sns
import pymc3 as pm
import numpy as np
from scipy.stats import norm
import pandas as pd
import theano.tensor as T
from theano.compile.ops import as_op
# Generate True data
n = 500
theta_true = [1.5, 2.5, 3.5, 4.5, 5.5, 6.5]
mu_true = 1
sigma_true = 2.5
true_underlying = np.random.normal(mu_true, sigma_true, size=n)
true_bucketed = np.digitize(true_underlying, theta_true)
# we want to fix the top and bottom thresholds, but let the others 'move'. Use a masked array for simplicity.
n_y_levels = len(theta_true) + 1
thresh_mus = [k + .5 for k in range(1, n_y_levels)]
thresh_mus_observed = np.array(thresh_mus)
thresh_mus_observed[1:-1] = -999
thresh_mus_observed = np.ma.masked_values(thresh_mus_observed, value=-999)
@as_op(itypes=[T.dvector, T.dscalar, T.dscalar], otypes=[T.dvector])
def bucket_probabilities(theta, mu, sigma):
"""Given cutpoints and parameters of normal distribution, calculate probabilities for each ordinal outcome."""
out = np.empty(n_y_levels)
n = norm(loc=mu, scale=sigma)
out[0] = n.cdf(theta[0])
out[1] = np.max([0, n.cdf(theta[1]) - n.cdf(theta[0])])
out[2] = np.max([0, n.cdf(theta[2]) - n.cdf(theta[1])])
out[3] = np.max([0, n.cdf(theta[3]) - n.cdf(theta[2])])
out[4] = np.max([0, n.cdf(theta[4]) - n.cdf(theta[3])])
out[5] = np.max([0, n.cdf(theta[5]) - n.cdf(theta[4])])
out[6] = 1 - n.cdf(theta[5])
return out
with pm.Model() as ordinal_model:
theta = pm.Normal('theta',
mu=thresh_mus,
tau=np.repeat(.5**2, len(thresh_mus)),
shape=len(thresh_mus),
observed=thresh_mus_observed,
testval=thresh_mus[1:-1])
mu = pm.Normal('mu',
mu=n_y_levels/2.0,
tau=1.0/(n_y_levels**2),
testval=1.0)
sigma = pm.Uniform('sigma',
n_y_levels/1000.0,
n_y_levels*10.0,
testval=2.5)
pr = bucket_probabilities(theta, mu, sigma)
obs = pm.Categorical('obs', pr, observed=true_bucketed)
# must specify Metropolis because Theano can't compute gradient of bucket_probabilities
step = pm.Metropolis([theta, mu, sigma, pr, obs])
trace = pm.sample(8000, step=step)
@JWarmenhoven
Copy link

JWarmenhoven commented Apr 6, 2017

Nice!
I am also trying to implement this model from Kruschke using PyMC3. Will definitely have a look at yours!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment