Created
May 10, 2019 17:07
Star
You must be signed in to star a gist
ADVI PYMC3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#BNN for Regression | |
%matplotlib inline | |
import theano | |
floatX = theano.config.floatX | |
import pymc3 as pm | |
import theano.tensor as T | |
import sklearn | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import time | |
from warnings import filterwarnings | |
filterwarnings('ignore') | |
sns.set_style('white') | |
from sklearn import datasets | |
from sklearn.preprocessing import scale | |
from sklearn.model_selection import train_test_split | |
#Toy model | |
def build_toy_dataset(N=50, noise_std=0.2): | |
x = np.linspace(-3, 3, num=N) | |
y = np.cos(x) + np.random.normal(0, noise_std, size=N) | |
# x = scale(x) | |
# y = scale(y) | |
x = x.astype(floatX).reshape((N, 1)) | |
y = y.astype(floatX) | |
return x, y | |
N = 50 # number of data points | |
D = 1 # number of features | |
X_train, Y_train = build_toy_dataset(N) | |
X_test, Y_test = build_toy_dataset(N) | |
fig, ax = plt.subplots() | |
ax.plot(X_test,Y_test,'ro',X_train,Y_train,'bx',alpha=0.2) | |
ax.legend(['Y_test','Y_train']) | |
ax.set(xlabel='X', ylabel='Y', title='Toy Regression data set'); | |
#2 layers with 5 nodes each | |
def construct_nn_2Layers_with_b(ann_input, ann_output): | |
n_hidden = 5 | |
n_features = ann_input.get_value().shape[1] | |
# Initialize random weights between each layer | |
init_1 = np.random.randn(n_features, n_hidden).astype(floatX) | |
init_2 = np.random.randn(n_hidden, n_hidden).astype(floatX) | |
init_out = np.random.randn(n_hidden).astype(floatX) | |
# Initialize random biases in each layer | |
init_b_1 = np.random.randn(n_hidden).astype(floatX) | |
init_b_2 = np.random.randn(n_hidden).astype(floatX) | |
init_b_out = np.random.randn(1).astype(floatX) | |
with pm.Model() as neural_network: | |
# Weights from input to hidden layer | |
weights_in_1 = pm.Normal('w_in_1', 0, sd=1, | |
shape=(n_features, n_hidden), | |
testval=init_1) | |
bias_1 = pm.Normal('b_1', mu=0, sd=1, shape=(n_hidden), testval=init_b_1) | |
# Weights from 1st to 2nd layer | |
weights_1_2 = pm.Normal('w_1_2', 0, sd=1, | |
shape=(n_hidden, n_hidden), | |
testval=init_2) | |
bias_2 = pm.Normal('b_2', mu=0, sd=1, shape=(n_hidden), testval=init_b_2) | |
# Weights from hidden layer to output | |
weights_2_out = pm.Normal('w_2_out', 0, sd=1, | |
shape=(n_hidden,), | |
testval=init_out) | |
bias_out = pm.Normal('b_out', mu=0, sd=1, shape=(1), testval=init_b_out) | |
# Build neural-network using tanh activation function | |
act_1 = pm.math.tanh(pm.math.dot(ann_input, | |
weights_in_1)+bias_1) | |
act_2 = pm.math.tanh(pm.math.dot(act_1, | |
weights_1_2)+bias_2) | |
act_out = (pm.math.dot(act_2, weights_2_out) + bias_out) | |
sd = pm.HalfNormal('sd', sd=1) | |
out = pm.Normal('out', mu=act_out, sd=sd, observed=ann_output, total_size=Y_train.shape[0]) | |
return neural_network | |
ann_input = theano.shared(X_train) | |
ann_output = theano.shared(Y_train) | |
neural_network = construct_nn_2Layers_with_b(ann_input, ann_output) | |
from pymc3.theanof import set_tt_rng, MRG_RandomStreams | |
set_tt_rng(MRG_RandomStreams(42)) | |
with neural_network: | |
s = theano.shared(pm.floatX(1)) | |
inference = pm.ADVI(cost_part_grad_scale=s) | |
tracker = pm.callbacks.Tracker( | |
mean=inference.approx.mean.eval, # callable that returns mean | |
std=inference.approx.std.eval # callable that returns std | |
) | |
pm.fit(n=20000, method=inference, callbacks=[tracker]) | |
# It is time to set `s` to zero | |
s.set_value(0) | |
approx = pm.fit(n=30000, method=inference, callbacks=[tracker] ) | |
# plot the evidence lower bound | |
ymin=-200; ymax=-40 | |
plt.figure(figsize=(12, 3)) | |
plt.plot(-inference.hist, label='new ADVI') | |
plt.legend() | |
plt.ylabel('ELBO') | |
plt.xlabel('iteration'); | |
plt.ylim(ymin,ymax) | |
trace = approx.sample(draws=5000) | |
pm.traceplot(trace); | |
ann_input.set_value(X_test) | |
ann_output.set_value(Y_test) | |
with neural_network: | |
ppc = pm.sample_posterior_predictive(trace, samples=500, progressbar=False) | |
pred = ppc['out'].mean(axis=0) | |
fig, ax = plt.subplots() | |
ax.plot(X_test,Y_test,'bx',alpha=0.5,label='Observed') | |
ax.plot(X_test,pred,'r--',alpha=0.9,label='Posterior predictive means') | |
ax.legend() | |
ax.set(xlabel='X', ylabel='Y', title='Test set'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment