Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save usptact/3a45d61fe778b646ba7ed8a5f8e89dee to your computer and use it in GitHub Desktop.
Save usptact/3a45d61fe778b646ba7ed8a5f8e89dee to your computer and use it in GitHub Desktop.
functional implementation of ordinal predicted variable
import seaborn as sns
import pymc3 as pm
import numpy as np
from scipy.stats import norm
import pandas as pd
import theano.tensor as T
from theano.compile.ops import as_op
# Generate True data
n = 500
theta_true = [1.5, 2.5, 3.5, 4.5, 5.5, 6.5]
mu_true = 1
sigma_true = 2.5
true_underlying = np.random.normal(mu_true, sigma_true, size=n)
true_bucketed = np.digitize(true_underlying, theta_true)
# we want to fix the top and bottom thresholds, but let the others 'move'. Use a masked array for simplicity.
n_y_levels = len(theta_true) + 1
thresh_mus = [k + .5 for k in range(1, n_y_levels)]
thresh_mus_observed = np.array(thresh_mus)
thresh_mus_observed[1:-1] = -999
thresh_mus_observed = np.ma.masked_values(thresh_mus_observed, value=-999)
@as_op(itypes=[T.dvector, T.dscalar, T.dscalar], otypes=[T.dvector])
def bucket_probabilities(theta, mu, sigma):
"""Given cutpoints and parameters of normal distribution, calculate probabilities for each ordinal outcome."""
out = np.empty(n_y_levels)
n = norm(loc=mu, scale=sigma)
out[0] = n.cdf(theta[0])
out[1] = np.max([0, n.cdf(theta[1]) - n.cdf(theta[0])])
out[2] = np.max([0, n.cdf(theta[2]) - n.cdf(theta[1])])
out[3] = np.max([0, n.cdf(theta[3]) - n.cdf(theta[2])])
out[4] = np.max([0, n.cdf(theta[4]) - n.cdf(theta[3])])
out[5] = np.max([0, n.cdf(theta[5]) - n.cdf(theta[4])])
out[6] = 1 - n.cdf(theta[5])
return out
with pm.Model() as ordinal_model:
theta = pm.Normal('theta',
mu=thresh_mus,
tau=np.repeat(.5**2, len(thresh_mus)),
shape=len(thresh_mus),
observed=thresh_mus_observed,
testval=thresh_mus[1:-1])
mu = pm.Normal('mu',
mu=n_y_levels/2.0,
tau=1.0/(n_y_levels**2),
testval=1.0)
sigma = pm.Uniform('sigma',
n_y_levels/1000.0,
n_y_levels*10.0,
testval=2.5)
pr = bucket_probabilities(theta, mu, sigma)
obs = pm.Categorical('obs', pr, observed=true_bucketed)
# must specify Metropolis because Theano can't compute gradient of bucket_probabilities
step = pm.Metropolis([theta, mu, sigma, pr, obs])
trace = pm.sample(8000, step=step)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment