samvaughan/sparse.py

## sparse.py
import numpy as np
import pymc3 as pm
import matplotlib.pyplot as plt

"""
We have a situation where we have something we want to predict (y) and many variables y might depend on (200). We have 100 observations of each variable. We choose that y depends linearly on the data, with slope beta and offset alpha. Only some of the slopes are meaningful (different from 0).Can we recover these values of alpha and beta?

A PyMC3 translation of the Stan code from https://betanalpha.github.io/assets/case_studies/bayes_sparse_regression.html
"""


def wide_priors():
    #What if we try and use a really wide prior here?
    with pm.Model() as uniform_model:

        m=pm.Normal('m', mu=0.0, sd=10.0, shape=M)
        c=pm.Normal('c', mu=0.0, sd=2.0)
        sig=pm.HalfNormal('sig', sd=2.0)


        y=pm.Normal('y', mu=pm.math.dot(X.T, m)+c, sd=sig, observed=Y)

        trace=pm.sample(2000)
    return trace


def Laplace_priors():
    #Laplace priors- put more weight near 0
    with pm.Model() as uniform_model:

        m=pm.Laplace('m', mu=0.0, b=4.0, shape=M)
        c=pm.Normal('c', mu=0.0, sd=2.0)
        sig=pm.HalfNormal('sig', sd=2.0)


        y=pm.Normal('y', mu=pm.math.dot(X.T, m)+c, sd=sig, observed=Y)

        trace=pm.sample(2000)
    return trace

#Finnish horseshoe prior: see https://betanalpha.github.io/assets/case_studies/bayes_sparse_regression.html#3_experiments


def finish_horseshoe():
    m0=10
    slab_scale = 3
    slab_scale_squared=slab_scale*slab_scale
    slab_degrees_of_freedom=25
    half_slab_df=slab_degrees_of_freedom*0.5

    with pm.Model() as finnish_horseshoe_prior:

        tau0 = (m0 / (M - m0)) * (sigma / np.sqrt(1.0 * N))

        beta_tilde = pm.Normal('beta_tilde', mu=0, sd=1, shape=M, testval=0.1)
        lamda = pm.HalfCauchy('lamda', beta=1, shape=M, testval=1.0)
        tau_tilde = pm.HalfCauchy('tau_tilde', beta=1, testval=0.1)
        c2_tilde = pm.InverseGamma('c2_tilde', alpha=half_slab_df, beta=half_slab_df, testval=0.5)


        tau=pm.Deterministic('tau', tau_tilde*tau0)
        c2=pm.Deterministic('c2',slab_scale_squared*c2_tilde)
        lamda_tilde =pm.Deterministic('lamda_tilde', pm.math.sqrt((c2 * pm.math.sqr(lamda) / (c2 + pm.math.sqr(tau) * pm.math.sqr(lamda)) )))

        m = pm.Deterministic('m', tau * lamda_tilde * beta_tilde)
        c=pm.Normal('c', mu=0.0, sd=2.0, testval=1.0)
        mu=pm.Deterministic('mu', pm.math.dot(X.T, m)+alpha)

        sig=pm.Normal('sig', mu=0.0, sd=2.0, testval=1.0)

        y=pm.Normal('y', mu=mu, sd=sig, observed=Y)

        trace=pm.sample(2000, init='advi+adapt_diag', nuts_kwargs={'target_accept':0.99, 'max_treedepth':15})


    return trace


def plot_posterior(trace, title='', fname='Posteriors.png'):

    plt.rc('text', usetex=True)
    plt.rc('font', family='serif')
    font = {'family' : 'normal',
            'weight' : 'bold',
            'size'   : 22}

    plt.rc('font', **font)

    with plt.style.context('default'):
        x=np.arange(M)
        fig, ax=plt.subplots(figsize=(20, 14))
        ax.bar(x, np.median(trace, axis=0), width=1.0, align='center', facecolor='steelblue')
        ax.bar(x, beta, width=1.0, align='center', linewidth=2.0, facecolor='None', edgecolor='k', zorder=10)


        XX=np.array([[i-0.5, i+0.5] for i in range(200)]).ravel()
        m_2_5=np.repeat(np.percentile(trace, 2.5, axis=0), 2)
        m_5=np.repeat(np.percentile(trace, 5, axis=0), 2)
        m_16=np.repeat(np.percentile(trace, 16, axis=0), 2)
        m_84=np.repeat(np.percentile(trace, 84, axis=0), 2)
        m_95=np.repeat(np.percentile(trace, 95, axis=0), 2)
        m_97_5=np.repeat(np.percentile(trace, 97.5, axis=0), 2)

        ax.fill_between(XX, m_2_5, m_97_5, facecolor='lightskyblue')
        ax.fill_between(XX, m_5, m_95, facecolor='skyblue')
        ax.fill_between(XX, m_16, m_84, facecolor='deepskyblue')


        ax.set_xlabel(r'i$^{\mathrm{th}}$ variable', fontsize=22)
        ax.set_ylabel('Posterior', fontsize=22)
        ax.set_title('Gradient Posterior', fontsize=22)
        ax.tick_params(axis='both', labelsize=22)
        ax.set_title(title)

        fig.savefig('{}'.format(name))


    ###Can edit this to plot the residuals as well

    # with plt.style.context('default'):

    #     x=np.arange(M)
    #     fig, ax=plt.subplots(figsize=(20, 14))
    #     ax.bar(x, np.median(trace['m']-beta, axis=0), width=1.0, align='center', facecolor='steelblue')
    #     # ax.bar(x, beta, width=1.0, align='center', linewidth=2.0, facecolor='None', edgecolor='k', zorder=10)
    #     ax.axhline(0.0, c='k', linestyle='dashed')


    #     XX=np.array([[i-0.5, i+0.5] for i in range(200)]).ravel()
    #     m_2_5=np.repeat(np.percentile(trace['m']-beta, 2.5, axis=0), 2)
    #     m_5=np.repeat(np.percentile(trace['m']-beta, 5, axis=0), 2)
    #     m_16=np.repeat(np.percentile(trace['m']-beta, 16, axis=0), 2)
    #     m_84=np.repeat(np.percentile(trace['m']-beta, 84, axis=0), 2)
    #     m_95=np.repeat(np.percentile(trace['m']-beta, 95, axis=0), 2)
    #     m_97_5=np.repeat(np.percentile(trace['m']-beta, 97.5, axis=0), 2)

    #     ax.fill_between(XX, m_2_5, m_97_5, facecolor='lightskyblue')
    #     ax.fill_between(XX, m_5, m_95, facecolor='skyblue')
    #     ax.fill_between(XX, m_16, m_84, facecolor='deepskyblue')

    #     ax.set_xlabel(r'i$^{\mathrm{th}}$ variable', fontsize=22)
    #     ax.set_ylabel(r'Posterior - True Value', fontsize=22)
    #     ax.set_title('Residuals', fontsize=22)
    #     ax.tick_params(axis='both', labelsize=22)

    #     ax.set_ylim(-5.0, 5.0)
    #     fig.savefig('Residuals.png')

    # plt.close('all')


if __name__=='__main__':

    np.random.seed(24)
    M=200 #Number of possible X variables
    N=100 #Number of values per variable
    #Notice we have more covariates than observations!


    alpha=3.0
    sigma=1.0

    prob_slope_is_meaninful=0.05

    #List of coefficients for every possible X variable
    #Most will be near 0, some will be large
    beta=np.empty(M)
    for i in range(M):
        if np.random.binomial(1, 0.05):
            #Now select a positive or negative gradient with p=0.5
            if np.random.binomial(1, 0.5):
                beta[i]=np.random.randn()+10.0
            else:
                beta[i]=np.random.randn()-10.0
        else:
            beta[i]=np.random.randn()*0.25


    #Our inputs, X,  and observed data, y
    X=np.random.randn(M, N)
    Y=np.random.randn(N)*sigma +(X.T @ beta + alpha)

    trace_first_attempt=wide_priors()
    trace_laplace=Laplace_priors()
    trace=finish_horseshoe()


    plot_posterior(trace_first_attempt['m'], title='Normal Priors', fname='normal_prior_posterior.png')
    plot_posterior(trace_laplace['m'], title='Laplace Priors', fname='laplace_prior_posterior.png')
    plot_posterior(trace['m'], title='Finnish Horseshoe', fname='horseshoe_prior_posterior.png')
	import numpy as np
	import pymc3 as pm
	import matplotlib.pyplot as plt

	"""
	We have a situation where we have something we want to predict (y) and many variables y might depend on (200). We have 100 observations of each variable. We choose that y depends linearly on the data, with slope beta and offset alpha. Only some of the slopes are meaningful (different from 0).Can we recover these values of alpha and beta?

	A PyMC3 translation of the Stan code from https://betanalpha.github.io/assets/case_studies/bayes_sparse_regression.html
	"""


	def wide_priors():
	#What if we try and use a really wide prior here?
	with pm.Model() as uniform_model:

	m=pm.Normal('m', mu=0.0, sd=10.0, shape=M)
	c=pm.Normal('c', mu=0.0, sd=2.0)
	sig=pm.HalfNormal('sig', sd=2.0)


	y=pm.Normal('y', mu=pm.math.dot(X.T, m)+c, sd=sig, observed=Y)

	trace=pm.sample(2000)
	return trace


	def Laplace_priors():
	#Laplace priors- put more weight near 0
	with pm.Model() as uniform_model:

	m=pm.Laplace('m', mu=0.0, b=4.0, shape=M)
	c=pm.Normal('c', mu=0.0, sd=2.0)
	sig=pm.HalfNormal('sig', sd=2.0)


	y=pm.Normal('y', mu=pm.math.dot(X.T, m)+c, sd=sig, observed=Y)

	trace=pm.sample(2000)
	return trace

	#Finnish horseshoe prior: see https://betanalpha.github.io/assets/case_studies/bayes_sparse_regression.html#3_experiments


	def finish_horseshoe():
	m0=10
	slab_scale = 3
	slab_scale_squared=slab_scale*slab_scale
	slab_degrees_of_freedom=25
	half_slab_df=slab_degrees_of_freedom*0.5

	with pm.Model() as finnish_horseshoe_prior:

	tau0 = (m0 / (M - m0)) * (sigma / np.sqrt(1.0 * N))

	beta_tilde = pm.Normal('beta_tilde', mu=0, sd=1, shape=M, testval=0.1)
	lamda = pm.HalfCauchy('lamda', beta=1, shape=M, testval=1.0)
	tau_tilde = pm.HalfCauchy('tau_tilde', beta=1, testval=0.1)
	c2_tilde = pm.InverseGamma('c2_tilde', alpha=half_slab_df, beta=half_slab_df, testval=0.5)


	tau=pm.Deterministic('tau', tau_tilde*tau0)
	c2=pm.Deterministic('c2',slab_scale_squared*c2_tilde)
	lamda_tilde =pm.Deterministic('lamda_tilde', pm.math.sqrt((c2 * pm.math.sqr(lamda) / (c2 + pm.math.sqr(tau) * pm.math.sqr(lamda)) )))

	m = pm.Deterministic('m', tau * lamda_tilde * beta_tilde)
	c=pm.Normal('c', mu=0.0, sd=2.0, testval=1.0)
	mu=pm.Deterministic('mu', pm.math.dot(X.T, m)+alpha)

	sig=pm.Normal('sig', mu=0.0, sd=2.0, testval=1.0)

	y=pm.Normal('y', mu=mu, sd=sig, observed=Y)

	trace=pm.sample(2000, init='advi+adapt_diag', nuts_kwargs={'target_accept':0.99, 'max_treedepth':15})


	return trace


	def plot_posterior(trace, title='', fname='Posteriors.png'):

	plt.rc('text', usetex=True)
	plt.rc('font', family='serif')
	font = {'family' : 'normal',
	'weight' : 'bold',
	'size' : 22}

	plt.rc('font', **font)

	with plt.style.context('default'):
	x=np.arange(M)
	fig, ax=plt.subplots(figsize=(20, 14))
	ax.bar(x, np.median(trace, axis=0), width=1.0, align='center', facecolor='steelblue')
	ax.bar(x, beta, width=1.0, align='center', linewidth=2.0, facecolor='None', edgecolor='k', zorder=10)



	XX=np.array([[i-0.5, i+0.5] for i in range(200)]).ravel()
	m_2_5=np.repeat(np.percentile(trace, 2.5, axis=0), 2)
	m_5=np.repeat(np.percentile(trace, 5, axis=0), 2)
	m_16=np.repeat(np.percentile(trace, 16, axis=0), 2)
	m_84=np.repeat(np.percentile(trace, 84, axis=0), 2)
	m_95=np.repeat(np.percentile(trace, 95, axis=0), 2)
	m_97_5=np.repeat(np.percentile(trace, 97.5, axis=0), 2)

	ax.fill_between(XX, m_2_5, m_97_5, facecolor='lightskyblue')
	ax.fill_between(XX, m_5, m_95, facecolor='skyblue')
	ax.fill_between(XX, m_16, m_84, facecolor='deepskyblue')


	ax.set_xlabel(r'i$^{\mathrm{th}}$ variable', fontsize=22)
	ax.set_ylabel('Posterior', fontsize=22)
	ax.set_title('Gradient Posterior', fontsize=22)
	ax.tick_params(axis='both', labelsize=22)
	ax.set_title(title)

	fig.savefig('{}'.format(name))



	###Can edit this to plot the residuals as well

	# with plt.style.context('default'):

	# x=np.arange(M)
	# fig, ax=plt.subplots(figsize=(20, 14))
	# ax.bar(x, np.median(trace['m']-beta, axis=0), width=1.0, align='center', facecolor='steelblue')
	# # ax.bar(x, beta, width=1.0, align='center', linewidth=2.0, facecolor='None', edgecolor='k', zorder=10)
	# ax.axhline(0.0, c='k', linestyle='dashed')



	# XX=np.array([[i-0.5, i+0.5] for i in range(200)]).ravel()
	# m_2_5=np.repeat(np.percentile(trace['m']-beta, 2.5, axis=0), 2)
	# m_5=np.repeat(np.percentile(trace['m']-beta, 5, axis=0), 2)
	# m_16=np.repeat(np.percentile(trace['m']-beta, 16, axis=0), 2)
	# m_84=np.repeat(np.percentile(trace['m']-beta, 84, axis=0), 2)
	# m_95=np.repeat(np.percentile(trace['m']-beta, 95, axis=0), 2)
	# m_97_5=np.repeat(np.percentile(trace['m']-beta, 97.5, axis=0), 2)

	# ax.fill_between(XX, m_2_5, m_97_5, facecolor='lightskyblue')
	# ax.fill_between(XX, m_5, m_95, facecolor='skyblue')
	# ax.fill_between(XX, m_16, m_84, facecolor='deepskyblue')

	# ax.set_xlabel(r'i$^{\mathrm{th}}$ variable', fontsize=22)
	# ax.set_ylabel(r'Posterior - True Value', fontsize=22)
	# ax.set_title('Residuals', fontsize=22)
	# ax.tick_params(axis='both', labelsize=22)

	# ax.set_ylim(-5.0, 5.0)
	# fig.savefig('Residuals.png')

	# plt.close('all')


	if __name__=='__main__':

	np.random.seed(24)
	M=200 #Number of possible X variables
	N=100 #Number of values per variable
	#Notice we have more covariates than observations!


	alpha=3.0
	sigma=1.0

	prob_slope_is_meaninful=0.05

	#List of coefficients for every possible X variable
	#Most will be near 0, some will be large
	beta=np.empty(M)
	for i in range(M):
	if np.random.binomial(1, 0.05):
	#Now select a positive or negative gradient with p=0.5
	if np.random.binomial(1, 0.5):
	beta[i]=np.random.randn()+10.0
	else:
	beta[i]=np.random.randn()-10.0
	else:
	beta[i]=np.random.randn()*0.25


	#Our inputs, X, and observed data, y
	X=np.random.randn(M, N)
	Y=np.random.randn(N)*sigma +(X.T @ beta + alpha)

	trace_first_attempt=wide_priors()
	trace_laplace=Laplace_priors()
	trace=finish_horseshoe()


	plot_posterior(trace_first_attempt['m'], title='Normal Priors', fname='normal_prior_posterior.png')
	plot_posterior(trace_laplace['m'], title='Laplace Priors', fname='laplace_prior_posterior.png')
	plot_posterior(trace['m'], title='Finnish Horseshoe', fname='horseshoe_prior_posterior.png')