DanielWeitzenfeld/ordinal_pymc3_DBDA23.2.2.py

## ordinal_pymc3_DBDA23.2.2.py
import seaborn as sns
import pymc3 as pm
import numpy as np
from scipy.stats import norm
import pandas as pd

import theano.tensor as T
from theano.compile.ops import as_op

# Generate True data
n = 500
theta_true = [1.5, 2.5, 3.5, 4.5, 5.5, 6.5]
mu_true = 1
sigma_true = 2.5
true_underlying = np.random.normal(mu_true, sigma_true, size=n)
true_bucketed = np.digitize(true_underlying, theta_true)

# we want to fix the top and bottom thresholds, but let the others 'move'.  Use a masked array for simplicity.
n_y_levels = len(theta_true) + 1
thresh_mus = [k + .5 for k in range(1, n_y_levels)]
thresh_mus_observed = np.array(thresh_mus)
thresh_mus_observed[1:-1] = -999
thresh_mus_observed = np.ma.masked_values(thresh_mus_observed, value=-999)

@as_op(itypes=[T.dvector, T.dscalar, T.dscalar], otypes=[T.dvector])
def bucket_probabilities(theta, mu, sigma):
    """Given cutpoints and parameters of normal distribution, calculate probabilities for each ordinal outcome."""
    out = np.empty(n_y_levels)
    n = norm(loc=mu, scale=sigma)
    out[0] = n.cdf(theta[0])
    out[1] = np.max([0, n.cdf(theta[1]) - n.cdf(theta[0])])
    out[2] = np.max([0, n.cdf(theta[2]) - n.cdf(theta[1])])
    out[3] = np.max([0, n.cdf(theta[3]) - n.cdf(theta[2])])
    out[4] = np.max([0, n.cdf(theta[4]) - n.cdf(theta[3])])
    out[5] = np.max([0, n.cdf(theta[5]) - n.cdf(theta[4])])
    out[6] = 1 - n.cdf(theta[5])
    return out

with pm.Model() as ordinal_model:
    theta = pm.Normal('theta',
                      mu=thresh_mus,
                      tau=np.repeat(.5**2, len(thresh_mus)),
                      shape=len(thresh_mus),
                      observed=thresh_mus_observed,
                      testval=thresh_mus[1:-1])
    mu = pm.Normal('mu',
                   mu=n_y_levels/2.0,
                   tau=1.0/(n_y_levels**2),
                   testval=1.0)
    sigma = pm.Uniform('sigma',
                       n_y_levels/1000.0,
                       n_y_levels*10.0,
                       testval=2.5)

    pr = bucket_probabilities(theta, mu, sigma)

    obs = pm.Categorical('obs', pr, observed=true_bucketed)

    # must specify Metropolis because Theano can't compute gradient of bucket_probabilities
    step = pm.Metropolis([theta, mu, sigma, pr, obs])
    trace = pm.sample(8000, step=step)
	import seaborn as sns
	import pymc3 as pm
	import numpy as np
	from scipy.stats import norm
	import pandas as pd

	import theano.tensor as T
	from theano.compile.ops import as_op

	# Generate True data
	n = 500
	theta_true = [1.5, 2.5, 3.5, 4.5, 5.5, 6.5]
	mu_true = 1
	sigma_true = 2.5
	true_underlying = np.random.normal(mu_true, sigma_true, size=n)
	true_bucketed = np.digitize(true_underlying, theta_true)

	# we want to fix the top and bottom thresholds, but let the others 'move'. Use a masked array for simplicity.
	n_y_levels = len(theta_true) + 1
	thresh_mus = [k + .5 for k in range(1, n_y_levels)]
	thresh_mus_observed = np.array(thresh_mus)
	thresh_mus_observed[1:-1] = -999
	thresh_mus_observed = np.ma.masked_values(thresh_mus_observed, value=-999)

	@as_op(itypes=[T.dvector, T.dscalar, T.dscalar], otypes=[T.dvector])
	def bucket_probabilities(theta, mu, sigma):
	"""Given cutpoints and parameters of normal distribution, calculate probabilities for each ordinal outcome."""
	out = np.empty(n_y_levels)
	n = norm(loc=mu, scale=sigma)
	out[0] = n.cdf(theta[0])
	out[1] = np.max([0, n.cdf(theta[1]) - n.cdf(theta[0])])
	out[2] = np.max([0, n.cdf(theta[2]) - n.cdf(theta[1])])
	out[3] = np.max([0, n.cdf(theta[3]) - n.cdf(theta[2])])
	out[4] = np.max([0, n.cdf(theta[4]) - n.cdf(theta[3])])
	out[5] = np.max([0, n.cdf(theta[5]) - n.cdf(theta[4])])
	out[6] = 1 - n.cdf(theta[5])
	return out

	with pm.Model() as ordinal_model:
	theta = pm.Normal('theta',
	mu=thresh_mus,
	tau=np.repeat(.5**2, len(thresh_mus)),
	shape=len(thresh_mus),
	observed=thresh_mus_observed,
	testval=thresh_mus[1:-1])
	mu = pm.Normal('mu',
	mu=n_y_levels/2.0,
	tau=1.0/(n_y_levels**2),
	testval=1.0)
	sigma = pm.Uniform('sigma',
	n_y_levels/1000.0,
	n_y_levels*10.0,
	testval=2.5)

	pr = bucket_probabilities(theta, mu, sigma)

	obs = pm.Categorical('obs', pr, observed=true_bucketed)

	# must specify Metropolis because Theano can't compute gradient of bucket_probabilities
	step = pm.Metropolis([theta, mu, sigma, pr, obs])
	trace = pm.sample(8000, step=step)