Skip to content

Instantly share code, notes, and snippets.

@jimmy15923
Last active November 21, 2018 04:25
Show Gist options
  • Save jimmy15923/6718e464ffd1d782a71b9a48a9e4e8fd to your computer and use it in GitHub Desktop.
Save jimmy15923/6718e464ffd1d782a71b9a48a9e4e8fd to your computer and use it in GitHub Desktop.
"""
Tesing code for CUDA unified memory
Run this script with CUDA unified memory by
```
python cuda_unified_test.py --image_size=224 --batch_size=256 --gpu_id=1 --cuda_memory=5
```
"""
import numpy as np
import time
import tensorflow as tf
from tensorflow.python.client import device_lib
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets as slimNet
tf.logging.set_verbosity(tf.logging.INFO)
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('f', '', 'kernel')
tf.app.flags.DEFINE_string("gpu_id", "0", "idx of GPU using")
tf.app.flags.DEFINE_string("model", "resnet", "select from resnet50, googlenet")
tf.app.flags.DEFINE_integer("batch_size", 512, "Batch size")
tf.app.flags.DEFINE_integer("image_size", 224, "Image size")
tf.app.flags.DEFINE_float("cuda_memory", 1, "pre-alloctaed of CUDA unified memory")
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id
# synthetic data
x = np.random.randint(0, 1, size=(2048, FLAGS.image_size, FLAGS.image_size, 3))
x = x.astype("float32")
y = np.random.randint(0, 1000, size=2048)
y = tf.keras.utils.to_categorical(y, 1000)
# def data generator
dataset = tf.data.Dataset.from_tensor_slices((x, y))
dataset = dataset.batch(FLAGS.batch_size).filter(lambda features, labels: tf.equal(tf.shape(labels)[0], FLAGS.batch_size))
dataset = dataset.repeat(50)
iterator = dataset.make_one_shot_iterator()
inputs, labels = iterator.get_next()
# build model
if FLAGS.model == "resnet50:
with slim.arg_scope(slimNet.resnet_utils.resnet_arg_scope(batch_norm_decay=0.99)):
_, layers_dict = slimNet.resnet_v1.resnet_v1_50(inputs, num_classes=1000, global_pool=True, is_training=True)
logits = layers_dict['resnet_v1_50/logits']
logits = tf.keras.layers.Flatten()(logits)
elif FLAGS.model == "googlenet":
with slim.arg_scope(slimNet.inception.inception_v1_arg_scope()):
_, layers_dict = slimNet.inception.inception_v1(inputs, num_classes=1000, is_training=True)
logits = layers_dict['Logits']
else:
print("No model support")
loss = tf.losses.softmax_cross_entropy(onehot_labels=labels,
logits=logits)
# Create training op.
with tf.name_scope('adam_optimizer'):
optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_step = optimizer.minimize(loss, global_step=tf.train.get_global_step())
# setup tf.ConfigProto()
config = tf.ConfigProto()
if FLAGS.cuda_memory > 1:
config.gpu_options.per_process_gpu_memory_fraction = FLAGS.cuda_memory
print("USING CUDA UNIFIED MEMORY")
else:
print("USING GPU MEMORY ONLY")
# Start session and training
res = []
# Start session and training
with tf.train.MonitoredTrainingSession(config=config) as sess:
for b in range(20):
t = time.time()
sess.run(train_step)
t1 = time.time()
_loss = sess.run(loss)
print("Num:", b, ", Loss: ", _loss, ", Elapsed time: ", t1 - t, "Images/sec: ", (FLAGS.batch_size / (t1-t)))
res.append(FLAGS.batch_size / (t1-t))
print(np.mean(res[1:]), " +- ", np.std(res[1:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment