Created
August 30, 2018 17:01
-
-
Save samvaughan/4773100003e7a7da199b6574f044ad41 to your computer and use it in GitHub Desktop.
PyMC3 code to perform Sparse Bayes Regression (a translation of the Stan Code from https://betanalpha.github.io/assets/case_studies/bayes_sparse_regression.html)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pymc3 as pm | |
import matplotlib.pyplot as plt | |
""" | |
We have a situation where we have something we want to predict (y) and many variables y might depend on (200). We have 100 observations of each variable. We choose that y depends linearly on the data, with slope beta and offset alpha. Only some of the slopes are meaningful (different from 0).Can we recover these values of alpha and beta? | |
A PyMC3 translation of the Stan code from https://betanalpha.github.io/assets/case_studies/bayes_sparse_regression.html | |
""" | |
def wide_priors(): | |
#What if we try and use a really wide prior here? | |
with pm.Model() as uniform_model: | |
m=pm.Normal('m', mu=0.0, sd=10.0, shape=M) | |
c=pm.Normal('c', mu=0.0, sd=2.0) | |
sig=pm.HalfNormal('sig', sd=2.0) | |
y=pm.Normal('y', mu=pm.math.dot(X.T, m)+c, sd=sig, observed=Y) | |
trace=pm.sample(2000) | |
return trace | |
def Laplace_priors(): | |
#Laplace priors- put more weight near 0 | |
with pm.Model() as uniform_model: | |
m=pm.Laplace('m', mu=0.0, b=4.0, shape=M) | |
c=pm.Normal('c', mu=0.0, sd=2.0) | |
sig=pm.HalfNormal('sig', sd=2.0) | |
y=pm.Normal('y', mu=pm.math.dot(X.T, m)+c, sd=sig, observed=Y) | |
trace=pm.sample(2000) | |
return trace | |
#Finnish horseshoe prior: see https://betanalpha.github.io/assets/case_studies/bayes_sparse_regression.html#3_experiments | |
def finish_horseshoe(): | |
m0=10 | |
slab_scale = 3 | |
slab_scale_squared=slab_scale*slab_scale | |
slab_degrees_of_freedom=25 | |
half_slab_df=slab_degrees_of_freedom*0.5 | |
with pm.Model() as finnish_horseshoe_prior: | |
tau0 = (m0 / (M - m0)) * (sigma / np.sqrt(1.0 * N)) | |
beta_tilde = pm.Normal('beta_tilde', mu=0, sd=1, shape=M, testval=0.1) | |
lamda = pm.HalfCauchy('lamda', beta=1, shape=M, testval=1.0) | |
tau_tilde = pm.HalfCauchy('tau_tilde', beta=1, testval=0.1) | |
c2_tilde = pm.InverseGamma('c2_tilde', alpha=half_slab_df, beta=half_slab_df, testval=0.5) | |
tau=pm.Deterministic('tau', tau_tilde*tau0) | |
c2=pm.Deterministic('c2',slab_scale_squared*c2_tilde) | |
lamda_tilde =pm.Deterministic('lamda_tilde', pm.math.sqrt((c2 * pm.math.sqr(lamda) / (c2 + pm.math.sqr(tau) * pm.math.sqr(lamda)) ))) | |
m = pm.Deterministic('m', tau * lamda_tilde * beta_tilde) | |
c=pm.Normal('c', mu=0.0, sd=2.0, testval=1.0) | |
mu=pm.Deterministic('mu', pm.math.dot(X.T, m)+alpha) | |
sig=pm.Normal('sig', mu=0.0, sd=2.0, testval=1.0) | |
y=pm.Normal('y', mu=mu, sd=sig, observed=Y) | |
trace=pm.sample(2000, init='advi+adapt_diag', nuts_kwargs={'target_accept':0.99, 'max_treedepth':15}) | |
return trace | |
def plot_posterior(trace, title='', fname='Posteriors.png'): | |
plt.rc('text', usetex=True) | |
plt.rc('font', family='serif') | |
font = {'family' : 'normal', | |
'weight' : 'bold', | |
'size' : 22} | |
plt.rc('font', **font) | |
with plt.style.context('default'): | |
x=np.arange(M) | |
fig, ax=plt.subplots(figsize=(20, 14)) | |
ax.bar(x, np.median(trace, axis=0), width=1.0, align='center', facecolor='steelblue') | |
ax.bar(x, beta, width=1.0, align='center', linewidth=2.0, facecolor='None', edgecolor='k', zorder=10) | |
XX=np.array([[i-0.5, i+0.5] for i in range(200)]).ravel() | |
m_2_5=np.repeat(np.percentile(trace, 2.5, axis=0), 2) | |
m_5=np.repeat(np.percentile(trace, 5, axis=0), 2) | |
m_16=np.repeat(np.percentile(trace, 16, axis=0), 2) | |
m_84=np.repeat(np.percentile(trace, 84, axis=0), 2) | |
m_95=np.repeat(np.percentile(trace, 95, axis=0), 2) | |
m_97_5=np.repeat(np.percentile(trace, 97.5, axis=0), 2) | |
ax.fill_between(XX, m_2_5, m_97_5, facecolor='lightskyblue') | |
ax.fill_between(XX, m_5, m_95, facecolor='skyblue') | |
ax.fill_between(XX, m_16, m_84, facecolor='deepskyblue') | |
ax.set_xlabel(r'i$^{\mathrm{th}}$ variable', fontsize=22) | |
ax.set_ylabel('Posterior', fontsize=22) | |
ax.set_title('Gradient Posterior', fontsize=22) | |
ax.tick_params(axis='both', labelsize=22) | |
ax.set_title(title) | |
fig.savefig('{}'.format(name)) | |
###Can edit this to plot the residuals as well | |
# with plt.style.context('default'): | |
# x=np.arange(M) | |
# fig, ax=plt.subplots(figsize=(20, 14)) | |
# ax.bar(x, np.median(trace['m']-beta, axis=0), width=1.0, align='center', facecolor='steelblue') | |
# # ax.bar(x, beta, width=1.0, align='center', linewidth=2.0, facecolor='None', edgecolor='k', zorder=10) | |
# ax.axhline(0.0, c='k', linestyle='dashed') | |
# XX=np.array([[i-0.5, i+0.5] for i in range(200)]).ravel() | |
# m_2_5=np.repeat(np.percentile(trace['m']-beta, 2.5, axis=0), 2) | |
# m_5=np.repeat(np.percentile(trace['m']-beta, 5, axis=0), 2) | |
# m_16=np.repeat(np.percentile(trace['m']-beta, 16, axis=0), 2) | |
# m_84=np.repeat(np.percentile(trace['m']-beta, 84, axis=0), 2) | |
# m_95=np.repeat(np.percentile(trace['m']-beta, 95, axis=0), 2) | |
# m_97_5=np.repeat(np.percentile(trace['m']-beta, 97.5, axis=0), 2) | |
# ax.fill_between(XX, m_2_5, m_97_5, facecolor='lightskyblue') | |
# ax.fill_between(XX, m_5, m_95, facecolor='skyblue') | |
# ax.fill_between(XX, m_16, m_84, facecolor='deepskyblue') | |
# ax.set_xlabel(r'i$^{\mathrm{th}}$ variable', fontsize=22) | |
# ax.set_ylabel(r'Posterior - True Value', fontsize=22) | |
# ax.set_title('Residuals', fontsize=22) | |
# ax.tick_params(axis='both', labelsize=22) | |
# ax.set_ylim(-5.0, 5.0) | |
# fig.savefig('Residuals.png') | |
# plt.close('all') | |
if __name__=='__main__': | |
np.random.seed(24) | |
M=200 #Number of possible X variables | |
N=100 #Number of values per variable | |
#Notice we have more covariates than observations! | |
alpha=3.0 | |
sigma=1.0 | |
prob_slope_is_meaninful=0.05 | |
#List of coefficients for every possible X variable | |
#Most will be near 0, some will be large | |
beta=np.empty(M) | |
for i in range(M): | |
if np.random.binomial(1, 0.05): | |
#Now select a positive or negative gradient with p=0.5 | |
if np.random.binomial(1, 0.5): | |
beta[i]=np.random.randn()+10.0 | |
else: | |
beta[i]=np.random.randn()-10.0 | |
else: | |
beta[i]=np.random.randn()*0.25 | |
#Our inputs, X, and observed data, y | |
X=np.random.randn(M, N) | |
Y=np.random.randn(N)*sigma +(X.T @ beta + alpha) | |
trace_first_attempt=wide_priors() | |
trace_laplace=Laplace_priors() | |
trace=finish_horseshoe() | |
plot_posterior(trace_first_attempt['m'], title='Normal Priors', fname='normal_prior_posterior.png') | |
plot_posterior(trace_laplace['m'], title='Laplace Priors', fname='laplace_prior_posterior.png') | |
plot_posterior(trace['m'], title='Finnish Horseshoe', fname='horseshoe_prior_posterior.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment