Last active
March 11, 2017 16:28
-
-
Save cs224/36482b4f52885310cec6ef975fd20184 to your computer and use it in GitHub Desktop.
Experiments in implementing a PyMC3 mixture model with two shifted Gamma stributions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns | |
import scipy.stats as stats | |
from theano import tensor as tt | |
import pymc3 as pm | |
from pymc3.distributions.dist_math import bound, logpow, gammaln | |
from pymc3.distributions.distribution import draw_values, generate_samples | |
SEED = 5132290 # from random.org | |
np.random.seed(SEED) | |
D = 200 # granularity of plots along the x-axis | |
PLOT = False | |
# PLOT = True | |
# Example starting code in this cell | |
n1 = 500 | |
n2 = 200 | |
n = n1+n2 | |
shift1 = 20 | |
shift2 = 40 | |
# https://docs.scipy.org/doc/numpy/reference/generated/numpy.random.gamma.html | |
# https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.stats.gamma.html | |
# https://en.wikipedia.org/wiki/Gamma_distribution | |
k = 2. | |
theta = 4. | |
np.random.gamma(k, theta) | |
data1 = np.random.gamma(k, theta, n1) + shift1 | |
# data1 = stats.gamma.rvs(k, loc=shift1, scale=theta, size=n1) # x = stats.gamma.rvs(2.5, loc=0, scale=4, size=1000) # A test sample. | |
# http://stackoverflow.com/questions/18703262/using-python-scipy-to-fit-gamma-distribution-to-data | |
stats.rv_continuous.fit(stats.gamma, data1, floc=shift1) | |
data2 = np.random.gamma(k, theta,n2) + shift2 | |
data = np.concatenate([data1 , data2]) | |
# np.max(data) | |
x_plot = np.linspace(data.min(), data.max(), D) | |
y_gamma_sum = (n1/n)*stats.gamma.pdf(x_plot, k, loc=shift1, scale=theta) + (n2/n)*stats.gamma.pdf(x_plot, k, loc=shift2, scale=theta) | |
if PLOT: | |
fig, ax = plt.subplots(figsize=(8, 6)) | |
n_, bins, patches = ax.hist(data, histtype='stepfilled', bins=100, alpha=0.85, label="k: {} and \\theta: {}".format(k, theta), color="#A60628", normed=True)#, weights=weights, normed=True | |
ax.plot(x_plot, y_gamma_sum, c='gray', alpha=0.9, label='True y_norm_sum'); | |
ax.legend(loc="upper left") | |
ax.set_title("sum of two gaussians") | |
ax.set_xlim(0,100) | |
ax.set_ylim(0.0, 0.1) | |
ax.set_xlabel("x") | |
class ShiftedLog(pm.distributions.continuous.transforms.ElemwiseTransform): | |
name = "log" | |
def __init__(self, c=0.0): | |
self.c = c | |
def backward(self, x): | |
return tt.exp(x + self.c) | |
def forward(self, x): | |
return tt.log(x - self.c) | |
class ShiftedGamma(pm.distributions.continuous.Continuous): | |
def __init__(self, alpha=None, beta=None, mu=None, sd=None, c=0.0, *args, **kwargs): | |
super(ShiftedGamma, self).__init__(transform=ShiftedLog(c), *args, **kwargs) | |
alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd) | |
self.alpha = alpha | |
self.beta = beta | |
self.mean = alpha / beta + c | |
self.mode = tt.maximum((alpha - 1) / beta, 0) + c | |
self.variance = alpha / beta**2 | |
self.c = c | |
# self.testval = kwargs.pop('testval', None) | |
# if not self.testval: | |
# self.testval = c + 10.0 | |
pm.distributions.continuous.assert_negative_support(alpha, 'alpha', 'Gamma') | |
pm.distributions.continuous.assert_negative_support(beta, 'beta', 'Gamma') | |
def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None): | |
if (alpha is not None) and (beta is not None): | |
pass | |
elif (mu is not None) and (sd is not None): | |
alpha = mu**2 / sd**2 | |
beta = mu / sd**2 | |
else: | |
raise ValueError('Incompatible parameterization. Either use ' | |
'alpha and beta, or mu and sd to specify ' | |
'distribution.') | |
return alpha, beta | |
def random(self, point=None, size=None, repeat=None): | |
alpha, beta, c = draw_values([self.alpha, self.beta, self.c], point=point) | |
return generate_samples(stats.gamma.rvs, alpha, scale=1. / beta, dist_shape=self.shape, size=size, loc=c) | |
def logp(self, value): | |
alpha = self.alpha | |
beta = self.beta | |
c = self.c | |
x = value - c | |
return bound(-gammaln(alpha) + logpow(beta, alpha) - beta * x + logpow(x, alpha - 1), x >= 0, alpha > 0, beta > 0) | |
# with pm.Model() as free: | |
# gamma = pm.Gamma('gamma', 2.0, 4.0) | |
# shifted_gamma = ShiftedGamma('shifted_gamma', 2.0, 4.0, c=100.0, testval=150.0) | |
# | |
# with free: | |
# start = pm.find_MAP() | |
# print("start", [(k, start[k]) for k in start.keys()]) | |
# | |
# | |
# with free: | |
# step = pm.Metropolis() | |
# free_trace = pm.sample(10000, step=step) | |
# | |
# t = free_trace[2000::10] | |
# pm.traceplot(t) | |
# with pm.Model() as single: | |
# alpha = pm.Uniform('alpha', 0, 10) # == k | |
# beta = pm.Uniform('beta' , 0, 10) # == theta | |
# shift = pm.Uniform('shift' , 18, 22) | |
# shifted_gamma = ShiftedGamma('shifted_gamma', alpha, beta, c=shift, observed=data1) # testval=20.0, | |
# | |
# with single: | |
# start = pm.find_MAP() | |
# print("start", [(k, start[k]) for k in start.keys()]) | |
# | |
# with single: | |
# step = pm.Metropolis() | |
# single_trace = pm.sample(10000, step=step) | |
# | |
# t = single_trace[2000::10] | |
# pm.traceplot(t) | |
# pm.summary(t) | |
# with pm.Model() as model1: | |
# p = pm.Beta( "p", 1., 1., testval=0.5) #this is the fraction that come from mean1 vs mean2 | |
# | |
# alpha = pm.Uniform('alpha', 0, 10) # == k | |
# beta = pm.Uniform('beta' , 0, 10) # == theta | |
# # shift1_ = pm.Uniform('shift1' , 18, 22) # , testval=21.0 | |
# # shift1 = pm.Normal('shift1', mu=20, sd=50) | |
# # shift2_ = pm.Uniform('shift2' , 38, 42) # , testval=41.0 | |
# # shift2 = pm.Normal('shift2', mu=20, sd=50) | |
# | |
# shifts = pm.Normal('shifts', mu=20, sd=50, shape=2) | |
# | |
# gamma1 = ShiftedGamma.dist(alpha, beta, c=shifts[0]) | |
# gamma2 = ShiftedGamma.dist(alpha, beta, c=shifts[1]) | |
# process = pm.Mixture('obs', tt.stack([p, 1-p]), [gamma1, gamma2], observed=data) | |
# | |
# with model1: | |
# start = pm.find_MAP() | |
# print("start", [(k, start[k]) for k in start.keys()]) | |
# | |
# # shift1.tag.test_value | |
# # shift1_.tag.test_value | |
# | |
# with model1: | |
# step = pm.Metropolis() | |
# trace1 = pm.sample(10000, step=step) | |
# | |
# t = trace1[2000::10] | |
# pm.traceplot(t) | |
# | |
with pm.Model() as model: | |
p = pm.Beta( "p", 1., 1., testval=0.5) #this is the fraction that come from mean1 vs mean2 | |
# min_p_potential = pm.Potential('min_p_potential', tt.switch(p > 0.1, -np.inf, 0) + tt.switch((1-p) > 0.1, -np.inf, 0)) | |
alpha = pm.Uniform('alpha', 0, 10) # == k | |
beta = pm.Uniform('beta' , 0, 1) # == theta | |
# alpha = pm.Gamma('alpha', mu=1.0, sd=10) # == k | |
# beta = pm.Gamma('beta' , mu=1.0, sd=1) # == theta | |
# alpha = pm.Exponential('alpha', 1.0) # == k | |
# beta = pm.Exponential('beta' , 1.0) # == theta | |
shifts = pm.Normal('shifts', mu=20, sd=50, shape=2) | |
# order_shifts_potential = pm.Potential('order_shifts_potential', tt.switch(shifts[0] - shifts[1] < 0, -np.inf, 0)) | |
# shifts = pm.Beta('shifts', 2.0, 2.0, shape=2) | |
# gamma = ShiftedGamma.dist(alpha, beta, c=(shifts*0.5)) | |
gamma = ShiftedGamma.dist(alpha, beta, c=shifts) | |
# process = pm.Mixture('obs', tt.stack([p, 1-p]), pm.Gamma.dist(alpha, beta), observed=data) | |
process = pm.Mixture('obs', tt.stack([p, 1-p]), gamma, observed=data) | |
# with model: | |
# # start, sds, elbo = pm.variational.advi(n=100000) | |
# start = pm.find_MAP() | |
# print("start", [(k, start[k]) for k in start.keys()]) | |
with model: | |
# step = pm.NUTS() | |
step = pm.Metropolis() | |
trace = pm.sample(10000, step=step) #, start=start | |
t = trace[2000::10] | |
pm.traceplot(t) | |
pm.summary(t) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://mc-stan.org/documentation/ | |
# https://modernstatisticalworkflow.blogspot.de/2016/10/finite-mixture-models-in-stan.html | |
# https://modernstatisticalworkflow.blogspot.de/2016/10/finite-mixture-model-with-time-varying.html | |
# https://modernstatisticalworkflow.blogspot.de/2016/11/a-bag-of-tips-and-tricks-for-dealing.html | |
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns | |
import scipy.stats as stats | |
import pystan | |
SEED = 5132290 # from random.org | |
np.random.seed(SEED) | |
D = 200 # granularity of plots along the x-axis | |
PLOT = False | |
# PLOT = True | |
# Example starting code in this cell | |
n1 = 500 | |
n2 = 200 | |
n = n1+n2 | |
shift1 = 20 | |
shift2 = 40 | |
# https://docs.scipy.org/doc/numpy/reference/generated/numpy.random.gamma.html | |
# https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.stats.gamma.html | |
# https://en.wikipedia.org/wiki/Gamma_distribution | |
k = 2. | |
theta = 4. | |
np.random.gamma(k, theta) | |
data1 = np.random.gamma(k, theta, n1) + shift1 | |
# data1 = stats.gamma.rvs(k, loc=shift1, scale=theta, size=n1) # x = stats.gamma.rvs(2.5, loc=0, scale=4, size=1000) # A test sample. | |
# http://stackoverflow.com/questions/18703262/using-python-scipy-to-fit-gamma-distribution-to-data | |
stats.rv_continuous.fit(stats.gamma, data1, floc=shift1) | |
data2 = np.random.gamma(k, theta,n2) + shift2 | |
data = np.concatenate([data1 , data2]) | |
# np.max(data) | |
x_plot = np.linspace(data.min(), data.max(), D) | |
y_gamma_sum = (n1/n)*stats.gamma.pdf(x_plot, k, loc=shift1, scale=theta) + (n2/n)*stats.gamma.pdf(x_plot, k, loc=shift2, scale=theta) | |
if PLOT: | |
fig, ax = plt.subplots(figsize=(8, 6)) | |
n_, bins, patches = ax.hist(data, histtype='stepfilled', bins=100, alpha=0.85, label="k: {} and \\theta: {}".format(k, theta), color="#A60628", normed=True)#, weights=weights, normed=True | |
ax.plot(x_plot, y_gamma_sum, c='gray', alpha=0.9, label='True y_norm_sum'); | |
ax.legend(loc="upper left") | |
ax.set_title("sum of two gaussians") | |
ax.set_xlim(0,100) | |
ax.set_ylim(0.0, 0.1) | |
ax.set_xlabel("x") | |
model_string = """ | |
data { | |
int<lower = 0> N; // number of observations | |
real y[N]; // y is a length N vector of integers | |
} | |
parameters { | |
real<lower=0, upper=1> p; | |
real<lower=0, upper=10> a; // a ~ uniform(0,10) | |
real<lower=0, upper=10> b; // b ~ uniform(0,10) | |
ordered[2] shifts; | |
} | |
model { | |
vector[2] contributions; | |
p ~ beta(2,2); | |
shifts ~ normal(30,50); | |
for(i in 1:N) { | |
contributions[1] = log(p) + gamma_lpdf(y[i] - shifts[1] | a, b); | |
contributions[2] = log(1-p) + gamma_lpdf(y[i] - shifts[2] | a, b); | |
target += log_sum_exp(contributions); | |
} | |
} | |
""" | |
sm = pystan.StanModel(model_code=model_string) | |
def initfun(): | |
return dict(p=0.7, a=2.0, b=0.2, shifts=np.array([20.0, 40.0])) | |
data_list = {'y': data, 'N': n} | |
# fit = sm.sampling(data=data_list, iter=1000, warmup=200, chains=2, thin=1, seed=SEED, init=initfun, algorithm='HMC') | |
fit = sm.sampling(data=data_list, iter=600, warmup=100, chains=2, thin=1, seed=SEED, init=initfun, algorithm='NUTS') | |
# fit = sm.sampling(data=data_list, iter=600, warmup=100, chains=2, thin=1, seed=SEED, init='random', algorithm='NUTS') | |
print(fit) | |
fit.traceplot() | |
# fit.plot(['theta']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment