Skip to content

Instantly share code, notes, and snippets.

@jimmy15923
Last active November 22, 2018 05:02
Show Gist options
  • Save jimmy15923/78584d85ed5ef203bbc0b8237cb36289 to your computer and use it in GitHub Desktop.
Save jimmy15923/78584d85ed5ef203bbc0b8237cb36289 to your computer and use it in GitHub Desktop.
test code for IBM LMS
"""
Tesing code for IBM LMS / CUDA Unified Memory
Run this script with CUDA Unified Memory by
```
python LMS_UM_test.py --image_size=224 --batch_size=256 --gpu_id=1 --cuda_memory=5
```
Run this script with IBM Large Model Support
```
python LMS_UM_test.py --image_size=224 --batch_size=256 --gpu_id=1 --use_lms=True
```
"""
import numpy as np
import time
import os
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets as slimNet
tf.logging.set_verbosity(tf.logging.INFO)
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('f', '', 'kernel')
tf.app.flags.DEFINE_string("gpu_id", "0", "idx of GPU using")
tf.app.flags.DEFINE_string("model", "resnet50", "select from resnet50, googlenet")
tf.app.flags.DEFINE_integer("batch_size", 512, "Batch size")
tf.app.flags.DEFINE_integer("image_size", 224, "Image size")
tf.app.flags.DEFINE_float("cuda_memory", 1, "pre-alloctaed of CUDA unified memory")
tf.app.flags.DEFINE_bool("use_lms", False, "To Use LMS")
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id
# generate synthetic data
x = np.random.randint(0, 1, size=(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 3))
x = x.astype("float32")
y = np.random.randint(0, 1000, size=FLAGS.batch_size)
y = tf.keras.utils.to_categorical(y, 1000)
# def tf.data.Dataset
features_placeholder = tf.placeholder(x.dtype, x.shape)
labels_placeholder = tf.placeholder(y.dtype, y.shape)
dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
dataset = dataset.batch(FLAGS.batch_size).filter(lambda features, labels: tf.equal(tf.shape(labels)[0], FLAGS.batch_size))
dataset = dataset.repeat(500)
iterator = dataset.make_initializable_iterator()
inputs, labels = iterator.get_next()
# build model
if FLAGS.model == "resnet50":
with slim.arg_scope(slimNet.resnet_utils.resnet_arg_scope(batch_norm_decay=0.99)):
_, layers_dict = slimNet.resnet_v1.resnet_v1_50(inputs, num_classes=1000, global_pool=True, is_training=True)
logits = layers_dict['resnet_v1_50/logits']
logits = tf.keras.layers.Flatten()(logits)
elif FLAGS.model == "googlenet":
with slim.arg_scope(slimNet.inception.inception_v1_arg_scope()):
_, layers_dict = slimNet.inception.inception_v1(inputs, spatial_squeeze=False, num_classes=1000, is_training=True)
fmap = layers_dict['Logits']
output = tf.keras.layers.GlobalAveragePooling2D()(fmap)
logits = tf.keras.layers.Dense(1000)(output)
else:
print("No model support")
loss = tf.losses.softmax_cross_entropy(onehot_labels=labels,
logits=logits)
# Create training op.
with tf.name_scope('adam_optimizer'):
optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_step = optimizer.minimize(loss, global_step=tf.train.get_global_step())
# import LMS and use
if FLAGS.use_lms:
print("USING IBM LARGE MODEL SUPPORT")
from tensorflow.contrib.lms import LMS
lms_obj = LMS({'adam_optimizer'})
lms_obj.run(graph=tf.get_default_graph())
# setup tf.ConfigProto for CUDA Unified memory
config = tf.ConfigProto()
if FLAGS.cuda_memory > 1:
config.gpu_options.per_process_gpu_memory_fraction = FLAGS.cuda_memory
print("USING CUDA UNIFIED MEMORY")
res = []
# Start session and training
with tf.train.MonitoredTrainingSession(config=config) as sess:
sess.run(iterator.initializer, feed_dict={features_placeholder: x,
labels_placeholder: y})
print("RUNNING WARMUP")
for w in range(5):
sess.run(train_step)
print("WARMUP DONE")
for b in range(1, 61):
t = time.time()
sess.run(train_step)
t1 = time.time()
_loss = sess.run(loss)
if b % 10 == 0:
print("Num:", b, ", Loss: ", _loss, ", Elapsed time: ", t1 - t, "Images/sec: ", (FLAGS.batch_size / (t1-t)))
res.append(FLAGS.batch_size / (t1-t))
print(np.mean(res), " +- ", np.std(res))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment