Skip to content

Instantly share code, notes, and snippets.

@roblem
Last active August 31, 2020 12:12
Show Gist options
  • Save roblem/cf50f4393aa337717662a2d0596e80b4 to your computer and use it in GitHub Desktop.
Save roblem/cf50f4393aa337717662a2d0596e80b4 to your computer and use it in GitHub Desktop.
ROCM 3.7 Issues
import sys
print("Running in :", sys.executable)
import tensorflow as tf
print("TF devices: ", tf.config.list_physical_devices())
import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd
import numpy as np
import pandas as pd
import time as time
# set tensorflow data type
dtype = tf.float32
##
## simple OLS Data Generation Process
##
# True beta
N = 50000
K = 500
b = np.random.randn(K)
b[0] = b[0] + 3
# True error std deviation
sigma_e = 1
x = np.c_[np.ones(N), np.random.randn(N,K-1)]
y = x.dot(b) + sigma_e * np.random.randn(N)
# estimate parameter vector, errors, sd of errors, and se of parameters
bols = np.linalg.inv(x.T.dot(x)).dot(x.T.dot(y))
err = y - x.dot(bols)
sigma_ols = np.sqrt(err.dot(err)/(x.shape[0] - x.shape[1]))
se = np.sqrt(err.dot(err)/(x.shape[0] - x.shape[1]) * np.diagonal(np.linalg.inv(x.T.dot(x))))
# put results together for easy viewing
ols_parms = np.r_[bols, sigma_ols]
ols_se = np.r_[se, np.nan]
print("\n")
indexn = ['b'+str(i) for i in range(K)]
indexn.extend(['sigma'])
print(pd.DataFrame(np.c_[ols_parms, ols_se],columns=['estimate', 'std err'],
index=indexn))
print("\n\n")
X = tf.constant(x, dtype=dtype)
Y = tf.constant(y, dtype=dtype)
N_ = tf.constant(N, dtype=dtype)
pi = tf.constant(np.pi, dtype=dtype)
nsamples = tf.constant(1000, dtype=tf.int32)
nburnin = tf.constant(500, dtype=tf.int32)
# initialize
init = [tf.constant(np.random.randn(K), dtype=dtype), tf.constant(1., dtype=dtype)]
##
## Model Log-Likelihood/Posterior
##
@tf.function#(experimental_compile=True)
def ols_loglike(beta, sigma):
# xb (mu_i for each observation)
mu = tf.linalg.matvec(X, beta)
# this is normal pdf logged and summed over all observations
ll = - (N_/2.)*tf.math.log(2.*pi*sigma**2) -\
(1./(2.*sigma**2.))*tf.math.reduce_sum((Y-mu)**2., axis=-1)
return ll
@tf.function(experimental_compile=True)
def ols_loglike_XLA(beta, sigma):
# xb (mu_i for each observation)
mu = tf.linalg.matvec(X, beta)
# this is normal pdf logged and summed over all observations
ll = - (N_/2.)*tf.math.log(2.*pi*sigma**2) -\
(1./(2.*sigma**2.))*tf.math.reduce_sum((Y-mu)**2., axis=-1)
return ll
#
# This is no xla
#
with tf.device('/CPU:0'):
ll = ols_loglike(init[0], init[1])
startt = time.time()
ll = ols_loglike(init[0], init[1])
endt = time.time()
print("\n\nLogL calculation in %2.2f MS on CPU"% ((endt - startt)*1000))
print("\n\n")
try:
ll = ols_loglike(init[0], init[1])
startt = time.time()
ll = ols_loglike(init[0], init[1])
endt = time.time()
print("\n\nLogL calculation in %2.2f MS on GPU"% ((endt - startt)*1000))
print("\n\n")
except:
print("GPU not available in this python environment")
#
# This is xla
#
with tf.device('/CPU:0'):
ll = ols_loglike_XLA(init[0], init[1])
startt = time.time()
ll = ols_loglike_XLA(init[0], init[1])
endt = time.time()
print("\n\nLogL calculation in %2.2f MS on CPU (XLA)"% ((endt - startt)*1000))
print("\n\n")
try:
ll = ols_loglike_XLA(init[0], init[1])
startt = time.time()
ll = ols_loglike_XLA(init[0], init[1])
endt = time.time()
print("\n\nLogL calculation in %2.2f MS on GPU"% ((endt - startt)*1000))
print("\n\n")
except:
print("GPU not available in this python environment")
import sys
print("Running in :", sys.executable)
import tensorflow as tf
devs = tf.config.list_physical_devices()
devs_l = [devs[i][-1] for i in range(len(devs))]
print("TF devices: ", devs)
if any("GPU" in s for s in devs_l):
GPU_avail = True
else:
GPU_avail = False
print("Is GPU Available? ", GPU_avail)
import tensorflow_probability as tfp
import numpy as np
import pandas as pd
import timeit
dtype = tf.float32
linalgsize = 1000
rowsize = 250000
def run_test(command, imports):
with tf.device('/CPU:0'):
# run once untimed
# _ = tfp.math.value_and_gradient(ols_loglike, [init[0], init[1]])
t = timeit.Timer(command, imports)
print("\nCommand %s took %2.4f milliseconds on cpu"% (command, min(t.repeat(number=1))*1000))
print("\n")
if GPU_avail:
with tf.device('/GPU:0'):
# run once untimed
# grad_obj = tfp.math.value_and_gradient(ols_loglike, [init[0], init[1]])
t = timeit.Timer(command, imports)
print("\nCommand %s took %2.4f milliseconds on gpu"% (command, min(t.repeat(number=1))*1000))
print("\n")
A = tf.constant(np.random.rand(linalgsize,linalgsize), dtype=dtype)
run_test("tf.linalg.inv(A)", "from __main__ import tf, timeit, A, GPU_avail")
run_test("tf.linalg.matmul(tf.transpose(A),A)", "from __main__ import tf, timeit, A, GPU_avail")
##
## gather and scatter tests
##
# vector to scatter
sites = 10
t = int(rowsize/sites)
scatvec = tf.constant(np.random.randn(5*t*sites), dtype=dtype)
X = tf.constant(np.random.randn(5*t*sites,5), dtype=dtype)
# randomly generate indices to project into 250000 x 5 matrix (with copies)
idx_all = [[n, s, t_] for n in range(5) for t_ in range(t) for s in range(sites)]
df_idx = pd.DataFrame(idx_all, columns=['id_n', 'id_s', 'id_t'])
time_site_idx = df_idx.groupby(['id_s','id_t'])['id_n'].count().reset_index().drop(columns=['id_n']).copy()
df_ts_idx = time_site_idx.reset_index(drop=False).rename(columns={'index':'id_ts'})
df_idx = df_idx.merge(df_ts_idx, on=['id_s','id_t'], how='left')
good_index = df_idx['id_ts'].reset_index().merge(df_idx[['id_ts','id_n']].reset_index(), on=['id_ts']).pivot(index='index_x', columns='id_n', values='index_y')
good_index.columns=[str(i) for i in range(5)]
good_index['id_ts'] = good_index.index.values
# create 2 indices for tensor ops below
ts_idx = [[int(el[-1]),i] for el in good_index.values.tolist() for i in range(5) if el[i] == el[i]]
ts_idx_1 = [[int(el[i])] for el in good_index.values.tolist() for i in range(5) if el[i] == el[i]]
ts_idx_ = tf.constant(ts_idx, dtype=tf.int32)
ts_idx_1_ = tf.constant(ts_idx_1, dtype=tf.int32)
scatvec_expanded = tf.gather_nd(scatvec, ts_idx_1_)
scat_vec_n = tf.scatter_nd(ts_idx_, scatvec_expanded, (scatvec.shape[0],5))
W = tf.constant(np.array([[0., .1, .1, .1, .1],
[.1, 0., .1, .1, .1],
[.1, .1, 0., .1, .1],
[.1, .1, .1, 0., .1],
[.1, .1, .1, 0., .1]]), dtype=dtype)
id_n = df_idx.id_n.values.tolist()
id_n = [[el] for el in id_n]
n_idx = tf.constant(id_n, dtype=tf.int32)
W_ = tf.gather_nd(W, n_idx)
run_test("tf.gather_nd(scatvec, ts_idx_1_)", "from __main__ import tf, timeit, ts_idx_1_, scatvec, GPU_avail")
run_test("tf.scatter_nd(ts_idx_, scatvec_expanded, (scatvec.shape[0],5))", "from __main__ import tf, timeit, ts_idx_, scatvec_expanded, scatvec, GPU_avail")
run_test("tf.gather_nd(W, n_idx)", "from __main__ import tf, timeit, n_idx, W, GPU_avail")
##
## bincount test
##
t_idx = tf.constant(df_idx.id_t, dtype=tf.int32)
sum_t = tf.math.bincount(t_idx,weights=scatvec)
run_test("tf.math.bincount(t_idx,weights=scatvec)", "from __main__ import tf, timeit, t_idx, scatvec, GPU_avail")
###
### multiply, reduce, and add ops
###
elem_mult = tf.multiply(W_, scat_vec_n)
run_test("tf.multiply(W_, scat_vec_n)", "from __main__ import tf, timeit, W_, scat_vec_n, GPU_avail")
elem_mult_reduce = tf.reduce_sum(elem_mult)
run_test("tf.reduce_sum(elem_mult)", "from __main__ import tf, timeit, elem_mult, GPU_avail")
run_test("scatvec + elem_mult_reduce", "from __main__ import tf, timeit, scatvec, elem_mult_reduce, GPU_avail")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment