Last active
August 31, 2020 12:12
-
-
Save roblem/cf50f4393aa337717662a2d0596e80b4 to your computer and use it in GitHub Desktop.
ROCM 3.7 Issues
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
print("Running in :", sys.executable) | |
import tensorflow as tf | |
print("TF devices: ", tf.config.list_physical_devices()) | |
import tensorflow_probability as tfp | |
from tensorflow_probability import distributions as tfd | |
import numpy as np | |
import pandas as pd | |
import time as time | |
# set tensorflow data type | |
dtype = tf.float32 | |
## | |
## simple OLS Data Generation Process | |
## | |
# True beta | |
N = 50000 | |
K = 500 | |
b = np.random.randn(K) | |
b[0] = b[0] + 3 | |
# True error std deviation | |
sigma_e = 1 | |
x = np.c_[np.ones(N), np.random.randn(N,K-1)] | |
y = x.dot(b) + sigma_e * np.random.randn(N) | |
# estimate parameter vector, errors, sd of errors, and se of parameters | |
bols = np.linalg.inv(x.T.dot(x)).dot(x.T.dot(y)) | |
err = y - x.dot(bols) | |
sigma_ols = np.sqrt(err.dot(err)/(x.shape[0] - x.shape[1])) | |
se = np.sqrt(err.dot(err)/(x.shape[0] - x.shape[1]) * np.diagonal(np.linalg.inv(x.T.dot(x)))) | |
# put results together for easy viewing | |
ols_parms = np.r_[bols, sigma_ols] | |
ols_se = np.r_[se, np.nan] | |
print("\n") | |
indexn = ['b'+str(i) for i in range(K)] | |
indexn.extend(['sigma']) | |
print(pd.DataFrame(np.c_[ols_parms, ols_se],columns=['estimate', 'std err'], | |
index=indexn)) | |
print("\n\n") | |
X = tf.constant(x, dtype=dtype) | |
Y = tf.constant(y, dtype=dtype) | |
N_ = tf.constant(N, dtype=dtype) | |
pi = tf.constant(np.pi, dtype=dtype) | |
nsamples = tf.constant(1000, dtype=tf.int32) | |
nburnin = tf.constant(500, dtype=tf.int32) | |
# initialize | |
init = [tf.constant(np.random.randn(K), dtype=dtype), tf.constant(1., dtype=dtype)] | |
## | |
## Model Log-Likelihood/Posterior | |
## | |
@tf.function#(experimental_compile=True) | |
def ols_loglike(beta, sigma): | |
# xb (mu_i for each observation) | |
mu = tf.linalg.matvec(X, beta) | |
# this is normal pdf logged and summed over all observations | |
ll = - (N_/2.)*tf.math.log(2.*pi*sigma**2) -\ | |
(1./(2.*sigma**2.))*tf.math.reduce_sum((Y-mu)**2., axis=-1) | |
return ll | |
@tf.function(experimental_compile=True) | |
def ols_loglike_XLA(beta, sigma): | |
# xb (mu_i for each observation) | |
mu = tf.linalg.matvec(X, beta) | |
# this is normal pdf logged and summed over all observations | |
ll = - (N_/2.)*tf.math.log(2.*pi*sigma**2) -\ | |
(1./(2.*sigma**2.))*tf.math.reduce_sum((Y-mu)**2., axis=-1) | |
return ll | |
# | |
# This is no xla | |
# | |
with tf.device('/CPU:0'): | |
ll = ols_loglike(init[0], init[1]) | |
startt = time.time() | |
ll = ols_loglike(init[0], init[1]) | |
endt = time.time() | |
print("\n\nLogL calculation in %2.2f MS on CPU"% ((endt - startt)*1000)) | |
print("\n\n") | |
try: | |
ll = ols_loglike(init[0], init[1]) | |
startt = time.time() | |
ll = ols_loglike(init[0], init[1]) | |
endt = time.time() | |
print("\n\nLogL calculation in %2.2f MS on GPU"% ((endt - startt)*1000)) | |
print("\n\n") | |
except: | |
print("GPU not available in this python environment") | |
# | |
# This is xla | |
# | |
with tf.device('/CPU:0'): | |
ll = ols_loglike_XLA(init[0], init[1]) | |
startt = time.time() | |
ll = ols_loglike_XLA(init[0], init[1]) | |
endt = time.time() | |
print("\n\nLogL calculation in %2.2f MS on CPU (XLA)"% ((endt - startt)*1000)) | |
print("\n\n") | |
try: | |
ll = ols_loglike_XLA(init[0], init[1]) | |
startt = time.time() | |
ll = ols_loglike_XLA(init[0], init[1]) | |
endt = time.time() | |
print("\n\nLogL calculation in %2.2f MS on GPU"% ((endt - startt)*1000)) | |
print("\n\n") | |
except: | |
print("GPU not available in this python environment") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
print("Running in :", sys.executable) | |
import tensorflow as tf | |
devs = tf.config.list_physical_devices() | |
devs_l = [devs[i][-1] for i in range(len(devs))] | |
print("TF devices: ", devs) | |
if any("GPU" in s for s in devs_l): | |
GPU_avail = True | |
else: | |
GPU_avail = False | |
print("Is GPU Available? ", GPU_avail) | |
import tensorflow_probability as tfp | |
import numpy as np | |
import pandas as pd | |
import timeit | |
dtype = tf.float32 | |
linalgsize = 1000 | |
rowsize = 250000 | |
def run_test(command, imports): | |
with tf.device('/CPU:0'): | |
# run once untimed | |
# _ = tfp.math.value_and_gradient(ols_loglike, [init[0], init[1]]) | |
t = timeit.Timer(command, imports) | |
print("\nCommand %s took %2.4f milliseconds on cpu"% (command, min(t.repeat(number=1))*1000)) | |
print("\n") | |
if GPU_avail: | |
with tf.device('/GPU:0'): | |
# run once untimed | |
# grad_obj = tfp.math.value_and_gradient(ols_loglike, [init[0], init[1]]) | |
t = timeit.Timer(command, imports) | |
print("\nCommand %s took %2.4f milliseconds on gpu"% (command, min(t.repeat(number=1))*1000)) | |
print("\n") | |
A = tf.constant(np.random.rand(linalgsize,linalgsize), dtype=dtype) | |
run_test("tf.linalg.inv(A)", "from __main__ import tf, timeit, A, GPU_avail") | |
run_test("tf.linalg.matmul(tf.transpose(A),A)", "from __main__ import tf, timeit, A, GPU_avail") | |
## | |
## gather and scatter tests | |
## | |
# vector to scatter | |
sites = 10 | |
t = int(rowsize/sites) | |
scatvec = tf.constant(np.random.randn(5*t*sites), dtype=dtype) | |
X = tf.constant(np.random.randn(5*t*sites,5), dtype=dtype) | |
# randomly generate indices to project into 250000 x 5 matrix (with copies) | |
idx_all = [[n, s, t_] for n in range(5) for t_ in range(t) for s in range(sites)] | |
df_idx = pd.DataFrame(idx_all, columns=['id_n', 'id_s', 'id_t']) | |
time_site_idx = df_idx.groupby(['id_s','id_t'])['id_n'].count().reset_index().drop(columns=['id_n']).copy() | |
df_ts_idx = time_site_idx.reset_index(drop=False).rename(columns={'index':'id_ts'}) | |
df_idx = df_idx.merge(df_ts_idx, on=['id_s','id_t'], how='left') | |
good_index = df_idx['id_ts'].reset_index().merge(df_idx[['id_ts','id_n']].reset_index(), on=['id_ts']).pivot(index='index_x', columns='id_n', values='index_y') | |
good_index.columns=[str(i) for i in range(5)] | |
good_index['id_ts'] = good_index.index.values | |
# create 2 indices for tensor ops below | |
ts_idx = [[int(el[-1]),i] for el in good_index.values.tolist() for i in range(5) if el[i] == el[i]] | |
ts_idx_1 = [[int(el[i])] for el in good_index.values.tolist() for i in range(5) if el[i] == el[i]] | |
ts_idx_ = tf.constant(ts_idx, dtype=tf.int32) | |
ts_idx_1_ = tf.constant(ts_idx_1, dtype=tf.int32) | |
scatvec_expanded = tf.gather_nd(scatvec, ts_idx_1_) | |
scat_vec_n = tf.scatter_nd(ts_idx_, scatvec_expanded, (scatvec.shape[0],5)) | |
W = tf.constant(np.array([[0., .1, .1, .1, .1], | |
[.1, 0., .1, .1, .1], | |
[.1, .1, 0., .1, .1], | |
[.1, .1, .1, 0., .1], | |
[.1, .1, .1, 0., .1]]), dtype=dtype) | |
id_n = df_idx.id_n.values.tolist() | |
id_n = [[el] for el in id_n] | |
n_idx = tf.constant(id_n, dtype=tf.int32) | |
W_ = tf.gather_nd(W, n_idx) | |
run_test("tf.gather_nd(scatvec, ts_idx_1_)", "from __main__ import tf, timeit, ts_idx_1_, scatvec, GPU_avail") | |
run_test("tf.scatter_nd(ts_idx_, scatvec_expanded, (scatvec.shape[0],5))", "from __main__ import tf, timeit, ts_idx_, scatvec_expanded, scatvec, GPU_avail") | |
run_test("tf.gather_nd(W, n_idx)", "from __main__ import tf, timeit, n_idx, W, GPU_avail") | |
## | |
## bincount test | |
## | |
t_idx = tf.constant(df_idx.id_t, dtype=tf.int32) | |
sum_t = tf.math.bincount(t_idx,weights=scatvec) | |
run_test("tf.math.bincount(t_idx,weights=scatvec)", "from __main__ import tf, timeit, t_idx, scatvec, GPU_avail") | |
### | |
### multiply, reduce, and add ops | |
### | |
elem_mult = tf.multiply(W_, scat_vec_n) | |
run_test("tf.multiply(W_, scat_vec_n)", "from __main__ import tf, timeit, W_, scat_vec_n, GPU_avail") | |
elem_mult_reduce = tf.reduce_sum(elem_mult) | |
run_test("tf.reduce_sum(elem_mult)", "from __main__ import tf, timeit, elem_mult, GPU_avail") | |
run_test("scatvec + elem_mult_reduce", "from __main__ import tf, timeit, scatvec, elem_mult_reduce, GPU_avail") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment