Skip to content

Instantly share code, notes, and snippets.

@adkoo
Created May 10, 2019 17:07
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save adkoo/99d960455258166b247d3310f439e6db to your computer and use it in GitHub Desktop.
ADVI PYMC3
#BNN for Regression
%matplotlib inline
import theano
floatX = theano.config.floatX
import pymc3 as pm
import theano.tensor as T
import sklearn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
from warnings import filterwarnings
filterwarnings('ignore')
sns.set_style('white')
from sklearn import datasets
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
#Toy model
def build_toy_dataset(N=50, noise_std=0.2):
x = np.linspace(-3, 3, num=N)
y = np.cos(x) + np.random.normal(0, noise_std, size=N)
# x = scale(x)
# y = scale(y)
x = x.astype(floatX).reshape((N, 1))
y = y.astype(floatX)
return x, y
N = 50 # number of data points
D = 1 # number of features
X_train, Y_train = build_toy_dataset(N)
X_test, Y_test = build_toy_dataset(N)
fig, ax = plt.subplots()
ax.plot(X_test,Y_test,'ro',X_train,Y_train,'bx',alpha=0.2)
ax.legend(['Y_test','Y_train'])
ax.set(xlabel='X', ylabel='Y', title='Toy Regression data set');
#2 layers with 5 nodes each
def construct_nn_2Layers_with_b(ann_input, ann_output):
n_hidden = 5
n_features = ann_input.get_value().shape[1]
# Initialize random weights between each layer
init_1 = np.random.randn(n_features, n_hidden).astype(floatX)
init_2 = np.random.randn(n_hidden, n_hidden).astype(floatX)
init_out = np.random.randn(n_hidden).astype(floatX)
# Initialize random biases in each layer
init_b_1 = np.random.randn(n_hidden).astype(floatX)
init_b_2 = np.random.randn(n_hidden).astype(floatX)
init_b_out = np.random.randn(1).astype(floatX)
with pm.Model() as neural_network:
# Weights from input to hidden layer
weights_in_1 = pm.Normal('w_in_1', 0, sd=1,
shape=(n_features, n_hidden),
testval=init_1)
bias_1 = pm.Normal('b_1', mu=0, sd=1, shape=(n_hidden), testval=init_b_1)
# Weights from 1st to 2nd layer
weights_1_2 = pm.Normal('w_1_2', 0, sd=1,
shape=(n_hidden, n_hidden),
testval=init_2)
bias_2 = pm.Normal('b_2', mu=0, sd=1, shape=(n_hidden), testval=init_b_2)
# Weights from hidden layer to output
weights_2_out = pm.Normal('w_2_out', 0, sd=1,
shape=(n_hidden,),
testval=init_out)
bias_out = pm.Normal('b_out', mu=0, sd=1, shape=(1), testval=init_b_out)
# Build neural-network using tanh activation function
act_1 = pm.math.tanh(pm.math.dot(ann_input,
weights_in_1)+bias_1)
act_2 = pm.math.tanh(pm.math.dot(act_1,
weights_1_2)+bias_2)
act_out = (pm.math.dot(act_2, weights_2_out) + bias_out)
sd = pm.HalfNormal('sd', sd=1)
out = pm.Normal('out', mu=act_out, sd=sd, observed=ann_output, total_size=Y_train.shape[0])
return neural_network
ann_input = theano.shared(X_train)
ann_output = theano.shared(Y_train)
neural_network = construct_nn_2Layers_with_b(ann_input, ann_output)
from pymc3.theanof import set_tt_rng, MRG_RandomStreams
set_tt_rng(MRG_RandomStreams(42))
with neural_network:
s = theano.shared(pm.floatX(1))
inference = pm.ADVI(cost_part_grad_scale=s)
tracker = pm.callbacks.Tracker(
mean=inference.approx.mean.eval, # callable that returns mean
std=inference.approx.std.eval # callable that returns std
)
pm.fit(n=20000, method=inference, callbacks=[tracker])
# It is time to set `s` to zero
s.set_value(0)
approx = pm.fit(n=30000, method=inference, callbacks=[tracker] )
# plot the evidence lower bound
ymin=-200; ymax=-40
plt.figure(figsize=(12, 3))
plt.plot(-inference.hist, label='new ADVI')
plt.legend()
plt.ylabel('ELBO')
plt.xlabel('iteration');
plt.ylim(ymin,ymax)
trace = approx.sample(draws=5000)
pm.traceplot(trace);
ann_input.set_value(X_test)
ann_output.set_value(Y_test)
with neural_network:
ppc = pm.sample_posterior_predictive(trace, samples=500, progressbar=False)
pred = ppc['out'].mean(axis=0)
fig, ax = plt.subplots()
ax.plot(X_test,Y_test,'bx',alpha=0.5,label='Observed')
ax.plot(X_test,pred,'r--',alpha=0.9,label='Posterior predictive means')
ax.legend()
ax.set(xlabel='X', ylabel='Y', title='Test set');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment