Skip to content

Instantly share code, notes, and snippets.

@lukemetz
Last active March 21, 2016 19:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lukemetz/2072e9f7b3f2b9325e25 to your computer and use it in GitHub Desktop.
Save lukemetz/2072e9f7b3f2b9325e25 to your computer and use it in GitHub Desktop.
tensorflow speed benchmark
# modified from slim
@scopes.add_arg_scope
def batch_norm(inputs,
decay=0.999,
scale=False,
epsilon=0.001,
moving_vars='moving_vars',
activation=None,
is_training=True,
trainable=True,
restore=True,
scope=None,
data_format='NHWC',
use_transpose=True):
with tf.variable_op_scope([inputs], scope, 'BatchNorm'):
transpose = False
if use_transpose and data_format == 'NCHW':
data_format = 'NHWC'
transpose = True
inputs = tf.transpose(inputs, [0, 2, 3, 1])
inputs_shape = inputs.get_shape()
if data_format == 'NHWC':
axis = range(len(inputs_shape) - 1)
params_shape = inputs_shape[-1:]
elif data_format == 'NCHW':
assert len(inputs_shape) == 4
axis = [0, 2, 3]
params_shape = (1, inputs_shape[1], 1, 1)
with scopes.arg_scope([variables.variable], restore=restore):
# Allocate parameters for the beta and gamma of the normalization.
beta = variables.variable('beta',
params_shape,
initializer=tf.zeros_initializer,
trainable=trainable)
if scale:
gamma = variables.variable('gamma',
params_shape,
initializer=tf.ones,
trainable=trainable)
else:
gamma = None
# Create moving_mean and moving_variance add them to moving_vars and
# GraphKeys.MOVING_AVERAGE_VARIABLES collections.
with scopes.arg_scope([variables.variable], trainable=False,
collections=[
moving_vars,
tf.GraphKeys.MOVING_AVERAGE_VARIABLES]):
moving_mean = variables.variable('moving_mean',
params_shape,
initializer=tf.zeros_initializer)
moving_variance = variables.variable('moving_variance',
params_shape,
initializer=tf.ones)
if is_training:
# Calculate the moments based on the individual batch.
if data_format == 'NCHW':
mean, variance = tf.nn.moments(inputs, axis, keep_dims=True)
elif data_format == 'NHWC':
mean, variance = tf.nn.moments(inputs, axis)
update_moving_mean = moving_averages.assign_moving_average(
moving_mean, mean, decay)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
update_moving_variance = moving_averages.assign_moving_average(
moving_variance, variance, decay)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
else:
# Just use the moving_mean and moving_variance.
mean = moving_mean
variance = moving_variance
outputs = tf.nn.batch_normalization(
inputs, mean, variance, beta, gamma, epsilon)
outputs.set_shape(inputs.get_shape())
if activation:
outputs = activation(outputs)
if transpose:
outputs = tf.transpose(outputs, [0, 3, 1, 2])
return outputs
import tensorflow as tf
import numpy as np
import slim
from slim import scopes
import slim.ops
import copy
import time
FLAGS = tf.app.flags.FLAGS
batch_norm_params = {
'decay': 0.9,
'scale': True,
'epsilon': 0.001,
}
@scopes.add_arg_scope
def residual(inp, num_filters_out, last_act=tf.nn.relu, is_training=True):
with tf.variable_op_scope([inp], None, 'residual'):
o = slim.ops.conv2d(inp, num_filters_out=num_filters_out, kernel_size=(3,3), batch_norm_params=batch_norm_params, is_training=is_training)
o = slim.ops.conv2d(o, num_filters_out=num_filters_out, activation=None, kernel_size=(3,3), batch_norm_params=batch_norm_params, is_training=is_training)
return last_act(inp + o)
@scopes.add_arg_scope
def down_residual(inp, num_filters_out, last_act=tf.nn.relu, is_training=True):
with tf.variable_op_scope([inp], None, 'down_residual'):
o = slim.ops.conv2d(inp, num_filters_out=num_filters_out, stride=2, kernel_size=(3,3), batch_norm_params=batch_norm_params, is_training=is_training)
o = slim.ops.conv2d(o, num_filters_out=num_filters_out, activation=None, kernel_size=(3,3), batch_norm_params=batch_norm_params, is_training=is_training)
num_filters_in = inp.get_shape()[-1]
weights_shape = [3, 3,
num_filters_in, num_filters_out]
weights = slim.variables.variable("weights", shape=weights_shape)
proj = tf.nn.conv2d(inp, filter=weights, strides=(1, 2, 2, 1), padding="SAME")
return last_act(o + proj)
def model(inp, num_labels=500, is_training=True):
o = slim.ops.conv2d(inp, num_filters_out=16, kernel_size=(3, 3), is_training=is_training, batch_norm_params=batch_norm_params)
o = residual(o, 16, is_training=is_training)
o = down_residual(o, 32, is_training=is_training)
o = residual(o, 32, is_training=is_training)
o = down_residual(o, 64, is_training=is_training)
o = residual(o, 64, is_training=is_training)
o = down_residual(o, 128, is_training=is_training)
o = residual(o, 128, is_training=is_training)
avg = slim.ops.avg_pool(o, kernel_size=(4, 4), stride=1)
flatten = slim.ops.flatten(avg)
logits = slim.ops.fc(flatten, num_units_out=num_labels, activation=None, is_training=is_training, batch_norm_params=batch_norm_params)
return flatten, logits
def loss_func(logit, label):
return tf.nn.sparse_softmax_cross_entropy_with_logits(logit, label)
def tower_loss(inp, labels, num_classes, scope, is_training=True):
flatten, logits = model(inp, num_labels=num_classes, is_training=is_training)
l = loss_func(logits, labels)
return l, logits
def _average_gradients(tower_grads):
# compied from tensorflow examples
"""Calculate the average gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list
is over individual gradients. The inner list is over the gradient
calculation for each tower.
Returns:
List of pairs of (gradient, variable) where the gradient has been averaged
across all towers.
"""
average_grads = []
for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = []
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(0, grads)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
def train():
with tf.device("/gpu:0"):
batch_size = 128
num_gpu = 4
num_classes = 500
summaries = []
with tf.Graph().as_default(), tf.device('/cpu:0'):
means = tf.constant(np.array([123.68, 116.779, 103.939], dtype="float32").reshape((1, 1, 1, 3)))
global_step = tf.get_variable('global_step', [], tf.int64,
tf.constant_initializer(0), trainable=False)
lr = tf.train.exponential_decay(0.002,
global_step,
int(1.2e6 / (batch_size*4)),
0.955,
staircase=True)
opt = tf.train.AdamOptimizer(lr)
summaries.append(tf.scalar_summary("lr", lr))
with tf.name_scope('model_towers') as scope, tf.device("/cpu:0"):
#images, labels = data.get_inputs(batch_size*num_gpu)
#images = tf.cast(images, tf.float32)
# Fake data to just tests speeds.
with tf.device("/cpu:0"):
images = tf.get_variable("images", [batch_size*num_gpu, 32, 32, 3], tf.float32, trainable=False)
labels = tf.get_variable("labels", [batch_size*num_gpu, ], tf.int64, tf.constant_initializer(0), trainable=False)
tower_grads = []
infos = []
for i in range(num_gpu):
b_imgs = images[i*batch_size:(i+1)*batch_size, :, :, :]
b_labels = labels[i*batch_size:(i+1)*batch_size]
b_imgs -= means
with tf.device("/gpu:%i"%i):
with tf.name_scope("Tower_%d"%i):
loss, logit = tower_loss(b_imgs, b_labels, num_classes, scope)
top5 = tf.nn.in_top_k(predictions=logit, targets=b_labels, k=5)
top5 = tf.cast(top5, tf.float32)
infos.append((tf.reduce_mean(loss), tf.reduce_mean(top5)))
grads = opt.compute_gradients(loss)
tower_grads.append(grads)
tf.get_variable_scope().reuse_variables()
grads = _average_gradients(tower_grads)
for grad, var in grads:
if grad:
summaries.append(
tf.histogram_summary(var.op.name + '/gradients', grad))
with tf.device("/gpu:0"):
train_op = opt.apply_gradients(grads, global_step=global_step)
update_ops = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION)
input_summaries = copy.copy(tf.get_collection(tf.GraphKeys.SUMMARIES))
summaries.extend(input_summaries)
summaries = list(set(summaries))
init = tf.initialize_all_variables()
sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True,
))
sess.run(init)
tf.train.start_queue_runners(sess=sess)
avgs = []
rate = 100
while True:
tstart = time.time()
i = sess.run([train_op] + update_ops)[1:3]
avgs.append(time.time() - tstart)
avgs = avgs[-rate:]
print "Examples per second", float(1.0 / np.mean(avgs) * batch_size * num_gpu)
def main(argv=None):
train()
if __name__ == "__main__":
tf.app.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment