Created
May 29, 2020 21:18
-
-
Save xudifsd/fb2858da3693f26a202f8ab9fc4606ed to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import numpy as np | |
import tensorflow as tf | |
import time | |
# Import MNIST data | |
from tensorflow.examples.tutorials.mnist import input_data | |
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) | |
# Parameters | |
num_gpus = 1 | |
num_steps = 20000 | |
learning_rate = 0.001 | |
batch_size = 1024 | |
display_step = 10 | |
# Network Parameters | |
num_input = 784 # MNIST data input (img shape: 28*28) | |
num_classes = 10 # MNIST total classes (0-9 digits) | |
dropout = 0.75 # Dropout, probability to keep units | |
# Build a convolutional neural network | |
def conv_net(x, n_classes, dropout, reuse, is_training): | |
# Define a scope for reusing the variables | |
with tf.variable_scope('ConvNet', reuse=reuse): | |
# MNIST data input is a 1-D vector of 784 features (28*28 pixels) | |
# Reshape to match picture format [Height x Width x Channel] | |
# Tensor input become 4-D: [Batch Size, Height, Width, Channel] | |
x = tf.reshape(x, shape=[-1, 28, 28, 1]) | |
# Convolution Layer with 64 filters and a kernel size of 5 | |
x = tf.layers.conv2d(x, 64, 5, activation=tf.nn.relu) | |
# Max Pooling (down-sampling) with strides of 2 and kernel size of 2 | |
x = tf.layers.max_pooling2d(x, 2, 2) | |
# Convolution Layer with 256 filters and a kernel size of 5 | |
x = tf.layers.conv2d(x, 256, 3, activation=tf.nn.relu) | |
# Convolution Layer with 512 filters and a kernel size of 5 | |
x = tf.layers.conv2d(x, 512, 3, activation=tf.nn.relu) | |
# Max Pooling (down-sampling) with strides of 2 and kernel size of 2 | |
x = tf.layers.max_pooling2d(x, 2, 2) | |
# Flatten the data to a 1-D vector for the fully connected layer | |
x = tf.contrib.layers.flatten(x) | |
# Fully connected layer (in contrib folder for now) | |
x = tf.layers.dense(x, 2048) | |
# Apply Dropout (if is_training is False, dropout is not applied) | |
x = tf.layers.dropout(x, rate=dropout, training=is_training) | |
# Fully connected layer (in contrib folder for now) | |
x = tf.layers.dense(x, 1024) | |
# Apply Dropout (if is_training is False, dropout is not applied) | |
x = tf.layers.dropout(x, rate=dropout, training=is_training) | |
# Output layer, class prediction | |
out = tf.layers.dense(x, n_classes) | |
# Because 'softmax_cross_entropy_with_logits' loss already apply | |
# softmax, we only apply softmax to testing network | |
out = tf.nn.softmax(out) if not is_training else out | |
return out | |
# Build the function to average the gradients | |
def average_gradients(tower_grads): | |
average_grads = [] | |
for grad_and_vars in zip(*tower_grads): | |
# Note that each grad_and_vars looks like the following: | |
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) | |
grads = [] | |
for g, _ in grad_and_vars: | |
# Add 0 dimension to the gradients to represent the tower. | |
expanded_g = tf.expand_dims(g, 0) | |
# Append on a 'tower' dimension which we will average over below. | |
grads.append(expanded_g) | |
# Average over the 'tower' dimension. | |
grad = tf.concat(grads, 0) | |
grad = tf.reduce_mean(grad, 0) | |
# Keep in mind that the Variables are redundant because they are shared | |
# across towers. So .. we will just return the first tower's pointer to | |
# the Variable. | |
v = grad_and_vars[0][1] | |
grad_and_var = (grad, v) | |
average_grads.append(grad_and_var) | |
return average_grads | |
# By default, all variables will be placed on '/gpu:0' | |
# So we need a custom device function, to assign all variables to '/cpu:0' | |
# Note: If GPUs are peered, '/gpu:0' can be a faster option | |
PS_OPS = ['Variable', 'VariableV2', 'AutoReloadVariable'] | |
def assign_to_device(device, ps_device='/cpu:0'): | |
def _assign(op): | |
node_def = op if isinstance(op, tf.NodeDef) else op.node_def | |
if node_def.op in PS_OPS: | |
return "/" + ps_device | |
else: | |
return device | |
return _assign | |
# Place all ops on CPU by default | |
with tf.device('/cpu:0'): | |
tower_grads = [] | |
reuse_vars = False | |
# tf Graph input | |
X = tf.placeholder(tf.float32, [None, num_input]) | |
Y = tf.placeholder(tf.float32, [None, num_classes]) | |
# Loop over all GPUs and construct their own computation graph | |
for i in range(num_gpus): | |
with tf.device(assign_to_device('/gpu:{}'.format(i), ps_device='/cpu:0')): | |
# Split data between GPUs | |
_x = X[i * batch_size: (i+1) * batch_size] | |
_y = Y[i * batch_size: (i+1) * batch_size] | |
# Because Dropout have different behavior at training and prediction time, we | |
# need to create 2 distinct computation graphs that share the same weights. | |
# Create a graph for training | |
logits_train = conv_net(_x, num_classes, dropout, | |
reuse=reuse_vars, is_training=True) | |
# Create another graph for testing that reuse the same weights | |
logits_test = conv_net(_x, num_classes, dropout, | |
reuse=True, is_training=False) | |
# Define loss and optimizer (with train logits, for dropout to take effect) | |
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( | |
logits=logits_train, labels=_y)) | |
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) | |
grads = optimizer.compute_gradients(loss_op) | |
# Only first GPU compute accuracy | |
if i == 0: | |
# Evaluate model (with test logits, for dropout to be disabled) | |
correct_pred = tf.equal(tf.argmax(logits_test, 1), tf.argmax(_y, 1)) | |
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) | |
reuse_vars = True | |
tower_grads.append(grads) | |
tower_grads = average_gradients(tower_grads) | |
train_op = optimizer.apply_gradients(tower_grads) | |
# Initializing the variables | |
init = tf.global_variables_initializer() | |
# Launch the graph | |
with tf.Session() as sess: | |
sess.run(init) | |
step = 1 | |
# Keep training until reach max iterations | |
for step in range(1, num_steps + 1): | |
# Get a batch for each GPU | |
batch_x, batch_y = mnist.train.next_batch(batch_size * num_gpus) | |
# Run optimization op (backprop) | |
ts = time.time() | |
sess.run(train_op, feed_dict={X: batch_x, Y: batch_y}) | |
te = time.time() - ts | |
if step % display_step == 0 or step == 1: | |
# Calculate batch loss and accuracy | |
loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x, | |
Y: batch_y}) | |
print("Step " + str(step) + ": Minibatch Loss= " + \ | |
"{:.4f}".format(loss) + ", Training Accuracy= " + \ | |
"{:.3f}".format(acc) + ", %i Examples/sec" % int(len(batch_x)/te)) | |
step += 1 | |
print("Optimization Finished!") | |
# Calculate accuracy for 1000 mnist test images | |
print("Testing Accuracy:", \ | |
np.mean([sess.run(accuracy, feed_dict={X: mnist.test.images[i:i+batch_size], | |
Y: mnist.test.labels[i:i+batch_size]}) for i in range(0, len(mnist.test.images), batch_size)])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment