Instantly share code, notes, and snippets.

Embed
What would you like to do?
Bayes by Backprop in PyTorch (introduced in the paper "Weight uncertainty in Neural Networks", Blundell et. al. 2015)
# Drawn from https://gist.github.com/rocknrollnerd/c5af642cf217971d93f499e8f70fcb72 (in Theano)
# This is implemented in PyTorch
# Author : Anirudh Vemula
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from sklearn import preprocessing
def log_gaussian(x, mu, sigma):
return float(-0.5 * np.log(2 * np.pi) - np.log(np.abs(sigma))) - (x - mu)**2 / (2 * sigma**2)
def log_gaussian_logsigma(x, mu, logsigma):
return float(-0.5 * np.log(2 * np.pi)) - logsigma - (x - mu)**2 / (2 * torch.exp(logsigma)**2)
class MLPLayer(nn.Module):
def __init__(self, n_input, n_output, sigma_prior):
super(MLPLayer, self).__init__()
self.n_input = n_input
self.n_output = n_output
self.sigma_prior = sigma_prior
self.W_mu = nn.Parameter(torch.Tensor(n_input, n_output).normal_(0, 0.01))
self.W_logsigma = nn.Parameter(torch.Tensor(n_input, n_output).normal_(0, 0.01))
self.b_mu = nn.Parameter(torch.Tensor(n_output).uniform_(-0.01, 0.01))
self.b_logsigma = nn.Parameter(torch.Tensor(n_output).uniform_(-0.01, 0.01))
self.lpw = 0
self.lqw = 0
def forward(self, X, infer=False):
if infer:
output = torch.mm(X, self.W_mu) + self.b_mu.expand(X.size()[0], self.n_output)
return output
epsilon_W, epsilon_b = self.get_random()
W = self.W_mu + torch.log(1 + torch.exp(self.W_logsigma)) * epsilon_W
b = self.b_mu + torch.log(1 + torch.exp(self.b_logsigma)) * epsilon_b
output = torch.mm(X, W) + b.expand(X.size()[0], self.n_output)
self.lpw = log_gaussian(W, 0, self.sigma_prior).sum() + log_gaussian(b, 0, self.sigma_prior).sum()
self.lqw = log_gaussian_logsigma(W, self.W_mu, self.W_logsigma).sum() + log_gaussian_logsigma(b, self.b_mu, self.b_logsigma).sum()
return output
def get_random(self):
return Variable(torch.Tensor(self.n_input, self.n_output).normal_(0, self.sigma_prior).cuda()), Variable(torch.Tensor(self.n_output).normal_(0, self.sigma_prior).cuda())
class MLP(nn.Module):
def __init__(self, n_input, sigma_prior):
super(MLP, self).__init__()
self.l1 = MLPLayer(n_input, 200, sigma_prior)
self.l1_relu = nn.ReLU()
self.l2 = MLPLayer(200, 200, sigma_prior)
self.l2_relu = nn.ReLU()
self.l3 = MLPLayer(200, 10, sigma_prior)
self.l3_softmax = nn.Softmax()
def forward(self, X, infer=False):
output = self.l1_relu(self.l1(X, infer))
output = self.l2_relu(self.l2(output, infer))
output = self.l3_softmax(self.l3(output, infer))
return output
def get_lpw_lqw(self):
lpw = self.l1.lpw + self.l2.lpw + self.l3.lpw
lqw = self.l1.lqw + self.l2.lqw + self.l3.lqw
return lpw, lqw
def forward_pass_samples(X, y):
s_log_pw, s_log_qw, s_log_likelihood = 0., 0., 0.
for _ in xrange(n_samples):
output = net(X)
sample_log_pw, sample_log_qw = net.get_lpw_lqw()
sample_log_likelihood = log_gaussian(y, output, sigma_prior).sum()
s_log_pw += sample_log_pw
s_log_qw += sample_log_qw
s_log_likelihood += sample_log_likelihood
return s_log_pw/n_samples, s_log_qw/n_samples, s_log_likelihood/n_samples
def criterion(l_pw, l_qw, l_likelihood):
return ((1./n_batches) * (l_qw - l_pw) - l_likelihood).sum() / float(batch_size)
mnist = fetch_mldata('MNIST original')
N = 5000
data = np.float32(mnist.data[:]) / 255.
idx = np.random.choice(data.shape[0], N)
data = data[idx]
target = np.int32(mnist.target[idx]).reshape(N, 1)
train_idx, test_idx = train_test_split(np.array(range(N)), test_size=0.05)
train_data, test_data = data[train_idx], data[test_idx]
train_target, test_target = target[train_idx], target[test_idx]
train_target = np.float32(preprocessing.OneHotEncoder(sparse=False).fit_transform(train_target))
n_input = train_data.shape[1]
M = train_data.shape[0]
sigma_prior = float(np.exp(-3))
n_samples = 3
learning_rate = 0.001
n_epochs = 100
# Initialize network
net = MLP(n_input, sigma_prior)
net = net.cuda()
# building the objective
# remember, we're evaluating by samples
log_pw, log_qw, log_likelihood = 0., 0., 0.
batch_size = 100
n_batches = M / float(batch_size)
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
n_train_batches = int(train_data.shape[0] / float(batch_size))
for e in xrange(n_epochs):
errs = []
for b in range(n_train_batches):
net.zero_grad()
X = Variable(torch.Tensor(train_data[b * batch_size: (b+1) * batch_size]).cuda())
y = Variable(torch.Tensor(train_target[b * batch_size: (b+1) * batch_size]).cuda())
log_pw, log_qw, log_likelihood = forward_pass_samples(X, y)
loss = criterion(log_pw, log_qw, log_likelihood)
errs.append(loss.data.cpu().numpy())
loss.backward()
optimizer.step()
X = Variable(torch.Tensor(test_data).cuda(), volatile=True)
pred = net(X, infer=True)
_, out = torch.max(pred, 1)
acc = np.count_nonzero(np.squeeze(out.data.cpu().numpy()) == np.int32(test_target.ravel())) / float(test_data.shape[0])
print 'epoch', e, 'loss', np.mean(errs), 'acc', acc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment