roblem/tensorflow_functions.py

## tensorflow_functions.py
import sys
print("Running in :", sys.executable)

import tensorflow as tf
print("TF devices: ", tf.config.list_physical_devices())

import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd
import numpy as np
import pandas as pd
import time as time

# set tensorflow data type
dtype = tf.float32

##
## simple OLS Data Generation Process
##
# True beta
N = 50000
K = 500
b = np.random.randn(K)
b[0] = b[0] + 3
# True error std deviation
sigma_e = 1

x = np.c_[np.ones(N), np.random.randn(N,K-1)]
y = x.dot(b) + sigma_e * np.random.randn(N)

# estimate parameter vector, errors, sd of errors, and se of parameters
bols = np.linalg.inv(x.T.dot(x)).dot(x.T.dot(y))
err = y - x.dot(bols)
sigma_ols = np.sqrt(err.dot(err)/(x.shape[0] - x.shape[1]))
se = np.sqrt(err.dot(err)/(x.shape[0] - x.shape[1]) * np.diagonal(np.linalg.inv(x.T.dot(x))))
# put results together for easy viewing
ols_parms = np.r_[bols, sigma_ols]
ols_se = np.r_[se, np.nan]
print("\n")
indexn = ['b'+str(i) for i in range(K)]
indexn.extend(['sigma'])
print(pd.DataFrame(np.c_[ols_parms, ols_se],columns=['estimate', 'std err'],
      index=indexn))
print("\n\n")
X = tf.constant(x, dtype=dtype)
Y = tf.constant(y, dtype=dtype)
N_ = tf.constant(N, dtype=dtype)
pi = tf.constant(np.pi, dtype=dtype)

nsamples =  tf.constant(1000, dtype=tf.int32)
nburnin = tf.constant(500, dtype=tf.int32)

# initialize
init = [tf.constant(np.random.randn(K), dtype=dtype), tf.constant(1., dtype=dtype)]

##
## Model Log-Likelihood/Posterior
##
@tf.function#(experimental_compile=True)
def ols_loglike(beta, sigma):
    # xb (mu_i for each observation)
    mu = tf.linalg.matvec(X, beta)
    # this is normal pdf logged and summed over all observations
    ll = - (N_/2.)*tf.math.log(2.*pi*sigma**2) -\
	    (1./(2.*sigma**2.))*tf.math.reduce_sum((Y-mu)**2., axis=-1)
    return ll

@tf.function(experimental_compile=True)
def ols_loglike_XLA(beta, sigma):
    # xb (mu_i for each observation)
    mu = tf.linalg.matvec(X, beta)
    # this is normal pdf logged and summed over all observations
    ll = - (N_/2.)*tf.math.log(2.*pi*sigma**2) -\
	    (1./(2.*sigma**2.))*tf.math.reduce_sum((Y-mu)**2., axis=-1)
    return ll

#
# This is no xla
#
with tf.device('/CPU:0'):
    ll = ols_loglike(init[0], init[1])
    startt = time.time()
    ll = ols_loglike(init[0], init[1])
    endt = time.time()
print("\n\nLogL calculation in %2.2f MS on CPU"% ((endt - startt)*1000))
print("\n\n")

try:
    ll = ols_loglike(init[0], init[1])
    startt = time.time()
    ll = ols_loglike(init[0], init[1])
    endt = time.time()
    print("\n\nLogL calculation in %2.2f MS on GPU"% ((endt - startt)*1000))
    print("\n\n")
except:
    print("GPU not available in this python environment")

#
# This is xla
#
with tf.device('/CPU:0'):
    ll = ols_loglike_XLA(init[0], init[1])
    startt = time.time()
    ll = ols_loglike_XLA(init[0], init[1])
    endt = time.time()
print("\n\nLogL calculation in %2.2f MS on CPU (XLA)"% ((endt - startt)*1000))
print("\n\n")

try:
    ll = ols_loglike_XLA(init[0], init[1])
    startt = time.time()
    ll = ols_loglike_XLA(init[0], init[1])
    endt = time.time()
    print("\n\nLogL calculation in %2.2f MS on GPU"% ((endt - startt)*1000))
    print("\n\n")
except:
    print("GPU not available in this python environment")

## tensorflow_ops.py
import sys
print("Running in :", sys.executable)

import tensorflow as tf
devs = tf.config.list_physical_devices()
devs_l = [devs[i][-1] for i in range(len(devs))]
print("TF devices: ", devs)
if any("GPU" in s for s in devs_l):
    GPU_avail = True
else:
    GPU_avail = False

print("Is GPU Available? ", GPU_avail)

import tensorflow_probability as tfp
import numpy as np
import pandas as pd
import timeit

dtype = tf.float32

linalgsize = 1000
rowsize = 250000

def run_test(command, imports):
    with tf.device('/CPU:0'):
        # run once untimed
        # _ = tfp.math.value_and_gradient(ols_loglike, [init[0], init[1]])
        t = timeit.Timer(command, imports)
        print("\nCommand %s took %2.4f milliseconds on cpu"% (command, min(t.repeat(number=1))*1000))
        print("\n")

    if GPU_avail:
        with tf.device('/GPU:0'):
            # run once untimed
            # grad_obj = tfp.math.value_and_gradient(ols_loglike, [init[0], init[1]])
            t = timeit.Timer(command, imports)
        print("\nCommand %s took %2.4f milliseconds on gpu"% (command, min(t.repeat(number=1))*1000))
        print("\n")


A = tf.constant(np.random.rand(linalgsize,linalgsize), dtype=dtype)


run_test("tf.linalg.inv(A)", "from __main__ import tf, timeit, A, GPU_avail")
run_test("tf.linalg.matmul(tf.transpose(A),A)", "from __main__ import tf, timeit, A, GPU_avail")

##
## gather and scatter tests
##

# vector to scatter
sites = 10
t = int(rowsize/sites)
scatvec = tf.constant(np.random.randn(5*t*sites), dtype=dtype)
X = tf.constant(np.random.randn(5*t*sites,5), dtype=dtype)
# randomly generate indices to project into 250000 x 5 matrix (with copies)
idx_all = [[n, s, t_] for n in range(5) for t_ in range(t) for s in range(sites)]

df_idx = pd.DataFrame(idx_all, columns=['id_n', 'id_s', 'id_t'])
time_site_idx = df_idx.groupby(['id_s','id_t'])['id_n'].count().reset_index().drop(columns=['id_n']).copy()
df_ts_idx = time_site_idx.reset_index(drop=False).rename(columns={'index':'id_ts'})

df_idx = df_idx.merge(df_ts_idx, on=['id_s','id_t'], how='left')

good_index = df_idx['id_ts'].reset_index().merge(df_idx[['id_ts','id_n']].reset_index(), on=['id_ts']).pivot(index='index_x', columns='id_n', values='index_y')
good_index.columns=[str(i) for i in range(5)]
good_index['id_ts'] = good_index.index.values

# create 2 indices for tensor ops below
ts_idx = [[int(el[-1]),i] for el in good_index.values.tolist() for i in range(5) if el[i] == el[i]]
ts_idx_1 = [[int(el[i])] for el in good_index.values.tolist() for i in range(5) if el[i] == el[i]]

ts_idx_ = tf.constant(ts_idx, dtype=tf.int32)
ts_idx_1_ = tf.constant(ts_idx_1, dtype=tf.int32)

scatvec_expanded = tf.gather_nd(scatvec, ts_idx_1_)
scat_vec_n = tf.scatter_nd(ts_idx_, scatvec_expanded, (scatvec.shape[0],5))

W = tf.constant(np.array([[0., .1, .1, .1, .1],
                          [.1, 0., .1, .1, .1],
                          [.1, .1, 0., .1, .1],
                          [.1, .1, .1, 0., .1],
                          [.1, .1, .1, 0., .1]]), dtype=dtype)
id_n = df_idx.id_n.values.tolist()
id_n = [[el] for el in id_n]


n_idx = tf.constant(id_n, dtype=tf.int32)
W_ = tf.gather_nd(W, n_idx)

run_test("tf.gather_nd(scatvec, ts_idx_1_)", "from __main__ import tf, timeit, ts_idx_1_, scatvec, GPU_avail")
run_test("tf.scatter_nd(ts_idx_, scatvec_expanded, (scatvec.shape[0],5))", "from __main__ import tf, timeit, ts_idx_, scatvec_expanded, scatvec, GPU_avail")
run_test("tf.gather_nd(W, n_idx)", "from __main__ import tf, timeit, n_idx, W, GPU_avail")

##
## bincount test
##
t_idx = tf.constant(df_idx.id_t, dtype=tf.int32)

sum_t = tf.math.bincount(t_idx,weights=scatvec)

run_test("tf.math.bincount(t_idx,weights=scatvec)", "from __main__ import tf, timeit, t_idx, scatvec, GPU_avail")


###
### multiply, reduce, and add ops
###
elem_mult = tf.multiply(W_, scat_vec_n)
run_test("tf.multiply(W_, scat_vec_n)", "from __main__ import tf, timeit, W_, scat_vec_n, GPU_avail")
elem_mult_reduce = tf.reduce_sum(elem_mult)
run_test("tf.reduce_sum(elem_mult)", "from __main__ import tf, timeit, elem_mult, GPU_avail")
run_test("scatvec + elem_mult_reduce", "from __main__ import tf, timeit, scatvec, elem_mult_reduce, GPU_avail")
	import sys
	print("Running in :", sys.executable)

	import tensorflow as tf
	print("TF devices: ", tf.config.list_physical_devices())

	import tensorflow_probability as tfp
	from tensorflow_probability import distributions as tfd
	import numpy as np
	import pandas as pd
	import time as time

	# set tensorflow data type
	dtype = tf.float32

	##
	## simple OLS Data Generation Process
	##
	# True beta
	N = 50000
	K = 500
	b = np.random.randn(K)
	b[0] = b[0] + 3
	# True error std deviation
	sigma_e = 1

	x = np.c_[np.ones(N), np.random.randn(N,K-1)]
	y = x.dot(b) + sigma_e * np.random.randn(N)

	# estimate parameter vector, errors, sd of errors, and se of parameters
	bols = np.linalg.inv(x.T.dot(x)).dot(x.T.dot(y))
	err = y - x.dot(bols)
	sigma_ols = np.sqrt(err.dot(err)/(x.shape[0] - x.shape[1]))
	se = np.sqrt(err.dot(err)/(x.shape[0] - x.shape[1]) * np.diagonal(np.linalg.inv(x.T.dot(x))))
	# put results together for easy viewing
	ols_parms = np.r_[bols, sigma_ols]
	ols_se = np.r_[se, np.nan]
	print("\n")
	indexn = ['b'+str(i) for i in range(K)]
	indexn.extend(['sigma'])
	print(pd.DataFrame(np.c_[ols_parms, ols_se],columns=['estimate', 'std err'],
	index=indexn))
	print("\n\n")
	X = tf.constant(x, dtype=dtype)
	Y = tf.constant(y, dtype=dtype)
	N_ = tf.constant(N, dtype=dtype)
	pi = tf.constant(np.pi, dtype=dtype)

	nsamples = tf.constant(1000, dtype=tf.int32)
	nburnin = tf.constant(500, dtype=tf.int32)

	# initialize
	init = [tf.constant(np.random.randn(K), dtype=dtype), tf.constant(1., dtype=dtype)]

	##
	## Model Log-Likelihood/Posterior
	##
	@tf.function#(experimental_compile=True)
	def ols_loglike(beta, sigma):
	# xb (mu_i for each observation)
	mu = tf.linalg.matvec(X, beta)
	# this is normal pdf logged and summed over all observations
	ll = - (N_/2.)tf.math.log(2.pisigma*2) -\
	(1./(2.sigma2.))tf.math.reduce_sum((Y-mu)**2., axis=-1)
	return ll

	@tf.function(experimental_compile=True)
	def ols_loglike_XLA(beta, sigma):
	# xb (mu_i for each observation)
	mu = tf.linalg.matvec(X, beta)
	# this is normal pdf logged and summed over all observations
	ll = - (N_/2.)tf.math.log(2.pisigma*2) -\
	(1./(2.sigma2.))tf.math.reduce_sum((Y-mu)**2., axis=-1)
	return ll

	#
	# This is no xla
	#
	with tf.device('/CPU:0'):
	ll = ols_loglike(init[0], init[1])
	startt = time.time()
	ll = ols_loglike(init[0], init[1])
	endt = time.time()
	print("\n\nLogL calculation in %2.2f MS on CPU"% ((endt - startt)*1000))
	print("\n\n")

	try:
	ll = ols_loglike(init[0], init[1])
	startt = time.time()
	ll = ols_loglike(init[0], init[1])
	endt = time.time()
	print("\n\nLogL calculation in %2.2f MS on GPU"% ((endt - startt)*1000))
	print("\n\n")
	except:
	print("GPU not available in this python environment")

	#
	# This is xla
	#
	with tf.device('/CPU:0'):
	ll = ols_loglike_XLA(init[0], init[1])
	startt = time.time()
	ll = ols_loglike_XLA(init[0], init[1])
	endt = time.time()
	print("\n\nLogL calculation in %2.2f MS on CPU (XLA)"% ((endt - startt)*1000))
	print("\n\n")

	try:
	ll = ols_loglike_XLA(init[0], init[1])
	startt = time.time()
	ll = ols_loglike_XLA(init[0], init[1])
	endt = time.time()
	print("\n\nLogL calculation in %2.2f MS on GPU"% ((endt - startt)*1000))
	print("\n\n")
	except:
	print("GPU not available in this python environment")