Skip to content

Instantly share code, notes, and snippets.

@fasiha
Created February 4, 2020 00:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fasiha/5bf34e1882fa09ea9b60f32dfd38540f to your computer and use it in GitHub Desktop.
Save fasiha/5bf34e1882fa09ea9b60f32dfd38540f to your computer and use it in GitHub Desktop.
Attempting to create a Gb1Binomial distribution in PyMC
import numpy as np
import theano.tensor as tt
import pymc3 as pm
from scipy.stats import beta, binom
def genGb1Binom(Ndata):
a = 3.3
b = 4.4
d = .333
n = np.random.randint(1, 20, size=Ndata)
p = beta.rvs(a, b, size=Ndata)**d
return (binom.rvs(n, p), n)
Ndata = 1000
obs, trials = genGb1Binom(Ndata)
import gb1binomial
with pm.Model() as model:
a = pm.Uniform('a', lower=1, upper=10)
b = pm.Uniform('b', lower=1, upper=10)
d = pm.Uniform('d', lower=.01, upper=10)
x = gb1binomial.Gb1Binomial('x',
observed=obs,
n=trials,
alpha=a,
beta=b,
delta=d)
trace = pm.sample(2000, tune=1000, cores=2)
df = pm.trace_to_dataframe(trace)
print(df.describe())
import theano
import theano.tensor as tt
from pymc3 import Discrete, floatX, intX
from pymc3.math import tround
from pymc3.distributions.bound import bound
from pymc3.distributions.discrete import binomln, betaln
from pymc3.distributions import draw_values, generate_samples
class Gb1Binomial(Discrete):
"Adaptation of BetaBinomial"
def __init__(self, alpha, beta, delta, n, *args, **kwargs):
super().__init__(*args, **kwargs)
self.alpha = alpha = tt.as_tensor_variable(floatX(alpha))
self.beta = beta = tt.as_tensor_variable(floatX(beta))
self.delta = delta = tt.as_tensor_variable(floatX(delta))
self.n = n = tt.as_tensor_variable(intX(n))
# self.mode = tt.cast(tround(alpha / (alpha + beta)), 'int8') # ??
def _random(self, alpha, beta, n, delta, size=None):
size = size or 1
p = stats.beta.rvs(a=alpha, b=beta, size=size).flatten()
# Sometimes scipy.beta returns nan. Ugh.
while np.any(np.isnan(p)):
i = np.isnan(p)
p[i] = stats.beta.rvs(a=alpha, b=beta, size=np.sum(i))
# Sigh...
_n, _p, _size = np.atleast_1d(n).flatten(), p.flatten(), p.shape[0]
_p = _p**delta
quotient, remainder = divmod(_p.shape[0], _n.shape[0])
if remainder != 0:
raise TypeError(
'n has a bad size! Was cast to {}, must evenly divide {}'.
format(_n.shape[0], _p.shape[0]))
if quotient != 1:
_n = np.tile(_n, quotient)
samples = np.reshape(stats.binom.rvs(n=_n, p=_p, size=_size), size)
return samples
def random(self, point=None, size=None):
alpha, beta, n, delta = \
draw_values([self.alpha, self.beta, self.n, self.delta], point=point, size=size)
return generate_samples(self._random,
alpha=alpha,
beta=beta,
n=n,
delta=delta,
dist_shape=self.shape,
size=size)
def logp(self, value):
def mapper(n, k, alpha, beta, delta):
i = tt.arange(n - k + 1.0)
mags = binomln(n - k, i) + betaln(delta * (i + k) + alpha, beta)
signs = (-tt.ones_like(i))**i # (-1.0)**i
return binomln(n, k) - betaln(alpha, beta) + logsumexp(mags, signs)
return bound(
theano.map(mapper,
sequences=[self.n, value],
non_sequences=[self.alpha, self.beta,
self.delta])[0], value >= 0,
value <= self.n, self.alpha > 0, self.beta > 0, self.delta > 0)
def _repr_latex_(self, name=None, dist=None):
if dist is None:
dist = self
alpha = dist.alpha
beta = dist.beta
name = r'\text{%s}' % name
return r'${} \sim \text{{Gb1Binomial}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(
name, get_variable_name(alpha), get_variable_name(beta))
def logsumexp(x, signs, axis=None):
"Adaptation of PyMC's logsumexp, but can take a list of signs like Scipy"
x_max = tt.max(x, axis=axis, keepdims=True)
result = tt.sum(signs * tt.exp(x - x_max), axis=axis, keepdims=True)
return tt.switch(result >= 0,
tt.log(result) + x_max,
tt.log(-result) + x_max)
from scipy.stats import beta, binom
from scipy.special import betaln
import numpy as np
def binomln(n, k):
# https://stackoverflow.com/a/21775412/500207
return -betaln(1 + n - k, 1 + k) - np.log(n + 1)
def genGb1Binom(Ndata, ALPHA, BETA, DELTA, N):
n = np.ones(Ndata, dtype=int) * N
p = beta.rvs(ALPHA, BETA, size=Ndata)**DELTA
return binom.rvs(n, p)
ALPHA = 3.3
BETA = 4.4
DELTA = 3.33
N = 5
Ndata = 1_000_000
obs = genGb1Binom(Ndata, ALPHA, BETA, DELTA, N)
def logp(k, alpha, beta, delta, n):
i = np.arange(n - k + 1.0)
mags = binomln(n - k, i) + betaln(delta * (i + k) + alpha, beta)
signs = (-1.0)**i
return binomln(n, k) - betaln(alpha, beta) + logsumexp(mags, signs)
def logsumexp(x, signs):
"Adaptation of PyMC's logsumexp, but can take a list of signs like Scipy"
x_max = np.max(x)
result = np.sum(signs * np.exp(x - x_max))
return np.log(result if result >= 0 else -result) + x_max
def makeEmpiricalPmf(v):
m = np.max(v)
d = np.zeros(1 + m)
for x in v:
d[x] += 1
return d / len(v)
pmf = np.array([np.exp(logp(n, ALPHA, BETA, DELTA, N)) for n in range(N + 1)])
assert np.allclose(np.sum(pmf), 1)
pmfhat = np.array(makeEmpiricalPmf(obs))
assert np.all(np.abs(pmfhat - pmf) / np.abs(pmf) < .1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment