Skip to content

Instantly share code, notes, and snippets.

@roblem
Last active March 12, 2020 15:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save roblem/f679570eb1b53dc95f718629ea3fa5d2 to your computer and use it in GitHub Desktop.
Save roblem/f679570eb1b53dc95f718629ea3fa5d2 to your computer and use it in GitHub Desktop.
Some tensorflow probability benchmarks
import sys
print("Running in :", sys.executable)
import tensorflow as tf
devs = tf.config.list_physical_devices()
devs_l = [devs[i][-1] for i in range(len(devs))]
print("TF devices: ", devs)
if any("GPU" in s for s in devs_l):
GPU_avail = True
else:
GPU_avail = False
print("Is GPU Available? ", GPU_avail)
import tensorflow_probability as tfp
import numpy as np
import pandas as pd
import time as time
# set tensorflow data type
dtype = tf.float32
##
## Dictates number of samples
##
nuts_burnin = 500
nuts_samples = 1000
##
## simple OLS Data Generation Process
##
# True beta
N = 50000
K = 500
b = np.random.randn(K)
b[0] = b[0] + 3
# True error std deviation
sigma_e = 1
x = np.c_[np.ones(N), np.random.randn(N,K-1)]
y = x.dot(b) + sigma_e * np.random.randn(N)
# estimate parameter vector, errors, sd of errors, and se of parameters
bols = np.linalg.inv(x.T.dot(x)).dot(x.T.dot(y))
err = y - x.dot(bols)
sigma_ols = np.sqrt(err.dot(err)/(x.shape[0] - x.shape[1]))
se = np.sqrt(err.dot(err)/(x.shape[0] - x.shape[1]) * np.diagonal(np.linalg.inv(x.T.dot(x))))
# put results together for easy viewing
ols_parms = np.r_[bols, sigma_ols]
ols_se = np.r_[se, np.nan]
print("\n")
indexn = ['b'+str(i) for i in range(K)]
indexn.extend(['sigma'])
print(pd.DataFrame(np.c_[ols_parms, ols_se],columns=['estimate', 'std err'],
index=indexn))
print("\n")
X = tf.constant(x, dtype=dtype)
Y = tf.constant(y, dtype=dtype)
N_ = tf.constant(N, dtype=dtype)
pi = tf.constant(np.pi, dtype=dtype)
init_step_size = .1
# initialize
init = [tf.constant(np.random.randn(K), dtype=dtype), tf.constant(1., dtype=dtype)]
##
## Model Log-Likelihood/Posterior
##
@tf.function
def ols_loglike(beta, sigma):
# xb (mu_i for each observation)
mu = tf.linalg.matvec(X, beta)
# this is normal pdf logged and summed over all observations
ll = - (N_/2.)*tf.math.log(2.*pi*sigma**2) - (1./(2.*sigma**2.))*tf.math.reduce_sum((Y-mu)**2., axis=-1)
return ll
#
# Evaluate speed of function evals (no tfp required)
#
with tf.device('/CPU:0'):
startt = time.time()
grad_obj = tfp.math.value_and_gradient(ols_loglike, [init[0], init[1]])
endt = time.time()
print("\nLogL calculation in %2.2f seconds on CPU"% (endt - startt))
print("\n")
if GPU_avail:
with tf.device('/GPU:0'):
# run once untimed
grad_obj = tfp.math.value_and_gradient(ols_loglike, [init[0], init[1]])
startt = time.time()
grad_obj = tfp.math.value_and_gradient(ols_loglike, [init[0], init[1]])
endt = time.time()
print("\nLogL calculation in %2.2f seconds on GPU"% (endt - startt))
print("\n")
##
## NUTS (using inner step size averaging step)
##
@tf.function
def nuts_sampler(init, nsamples):
@tf.function
def ols_loglike(beta, sigma):
# xb (mu_i for each observation)
mu = tf.linalg.matvec(X, beta)
# this is normal pdf logged and summed over all observations
ll = - (N_/2.)*tf.math.log(2.*pi*sigma**2) - (1./(2.*sigma**2.))*tf.math.reduce_sum((Y-mu)**2., axis=-1)
return ll
nuts_kernel = tfp.mcmc.NoUTurnSampler(
target_log_prob_fn=ols_loglike,
step_size=init_step_size)
adapt_nuts_kernel = tfp.mcmc.DualAveragingStepSizeAdaptation(
inner_kernel=nuts_kernel,
num_adaptation_steps=nuts_burnin,
step_size_getter_fn=lambda pkr: pkr.step_size,
log_accept_prob_getter_fn=lambda pkr: pkr.log_accept_ratio,
step_size_setter_fn=lambda pkr, new_step_size: pkr._replace(step_size=new_step_size))
samples_nuts_, stats_nuts_ = tfp.mcmc.sample_chain(
num_results=nsamples,
current_state=init,
kernel=adapt_nuts_kernel,
num_burnin_steps=100, parallel_iterations=5)
return samples_nuts_, stats_nuts_
with tf.device('/CPU:0'):
startt = time.time()
samples_nuts, stats_nuts = nuts_sampler(init, nuts_samples)
endt = time.time()
print("\nNuts sampling completed in %2.2f seconds on CPU"% (endt - startt))
print("\n")
if GPU_avail:
with tf.device('/GPU:0'):
startt = time.time()
samples_nuts, stats_nuts = nuts_sampler(init, nuts_samples)
endt = time.time()
print("\nNuts sampling completed in %2.2f seconds on GPU"% (endt - startt))
print("\n")
@tf.function
def mcmc_sampler(init_vals, nsamples):
@tf.function
def ols_loglike(beta, sigma):
# xb (mu_i for each observation)
mu = tf.linalg.matvec(X, beta)
# this is normal pdf logged and summed over all observations
ll = - (N_/2.)*tf.math.log(2.*pi*sigma**2) - (1./(2.*sigma**2.))*tf.math.reduce_sum((Y-mu)**2., axis=-1)
return ll
kernel = tfp.mcmc.RandomWalkMetropolis(
target_log_prob_fn=ols_loglike,
new_state_fn=tfp.mcmc.random_walk_normal_fn(scale=.04),seed=42)
samples_, stats_ = tfp.mcmc.sample_chain(
num_results=nsamples,
current_state= init_vals,
kernel=kernel,
num_burnin_steps=nuts_burnin,
parallel_iterations=5)
return samples_, stats_
with tf.device('/CPU:0'):
startt = time.time()
samples, stats = mcmc_sampler(init, nuts_samples)
endt = time.time()
print("\nMHRW completed in %2.2f seconds on CPU"% (endt - startt))
print("\n")
if GPU_avail:
with tf.device('/GPU:0'):
startt = time.time()
samples, stats = mcmc_sampler(init, nuts_samples)
endt = time.time()
print("\nMHRW completed in %2.2f seconds on GPU"% (endt - startt))
print("\n")
trace_sigman = samples_nuts[1].numpy()
trace_betan = samples_nuts[0].numpy()
est_nuts = np.r_[trace_betan.mean(axis=0), trace_sigman.mean()]
std_nuts = np.r_[trace_betan.std(axis=0), trace_sigman.std()]
# assemble and print
print(pd.DataFrame(np.c_[est_nuts, std_nuts],columns=['estimate', 'std err'],
index=indexn))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment