Last active
October 10, 2018 14:55
-
-
Save feevos/0f53ac428a48061e0aadae29cc2d58c7 to your computer and use it in GitHub Desktop.
example of mxnet/gluon accuracy evaluation on single/multi gpu context
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import numpy as np | |
import mxnet as mx | |
from mxnet import nd, autograd, gluon | |
from time import time | |
import warnings | |
warnings.filterwarnings('ignore') | |
mx.random.seed(1) | |
batch_size = 256 | |
num_inputs = 784 | |
num_outputs = 10 | |
num_gpus = 1 # Change this if you have more than one gpu | |
learning_rate = .1 | |
ctx = [mx.gpu(i) for i in range(num_gpus)] | |
# Read the data | |
def transform(data, label): | |
return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32) | |
train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True, transform=transform), | |
batch_size, shuffle=True, num_workers=4,last_batch='discard') | |
test_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=False, transform=transform), | |
batch_size, shuffle=False, num_workers=4,last_batch='discard') | |
# Create your network | |
num_fc = 512 | |
net = gluon.nn.Sequential() | |
with net.name_scope(): | |
net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation='relu')) | |
net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2)) | |
net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, activation='relu')) | |
net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2)) | |
# The Flatten layer collapses all axis, except the first one, into one axis. | |
net.add(gluon.nn.Flatten()) | |
net.add(gluon.nn.Dense(num_fc, activation="relu")) | |
net.add(gluon.nn.Dense(num_outputs)) | |
# initialize + hybridize it | |
net.initialize(mx.init.Xavier(magnitude=2.24), force_reinit=True, ctx=ctx) | |
net.hybridize(static_alloc=True,static_shape=True) | |
# Loss function | |
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() | |
# Trainer (SGD) | |
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': learning_rate}) | |
# Evaluate accuracy v1 | |
# Suggession by feevos: runs | |
def eval_acc_feevos1(net, _data_generator): | |
acc = mx.metric.Accuracy() # Single accuracy | |
for i, (tdata, tlabel) in enumerate(_data_generator): | |
data = tdata.as_in_context(mx.gpu(0)) | |
label = nd.array(tlabel) # keep this in cpu context, since this is already done inside the definition of Accuracy | |
pred = nd.argmax(net(data),axis=1).as_in_context(mx.cpu()) | |
acc.update(preds=pred,labels=label) | |
return (acc.get()[1]) | |
# Eval accuracy, v2 | |
# Suggession by feevos: runs | |
def eval_acc_feevos2(net, _data_generator): | |
acc = mx.metric.Accuracy() # Single accuracy | |
for i, (tdata, tlabel) in enumerate(_data_generator): | |
data = gluon.utils.split_and_load(tdata, ctx) | |
label = nd.array(tlabel) # keep this in cpu context, since this is already done inside the definition of Accuracy | |
# Perform inference on each separate GPU | |
pred = [nd.argmax(net(X),axis=1).as_in_context(mx.cpu()) for X in data] | |
pred = nd.concat(*pred,dim=0) # Collect results | |
acc.update(preds=pred,labels=label) # update single accuracy | |
return (acc.get()[1]) | |
# Run me | |
epochs = 7 | |
smoothing_constant = .01 | |
test_acc = train_acc = 0 | |
for e in range(epochs): | |
train_loss = 0. | |
tic = time() | |
c=1 | |
for data, label in train_data: # read the batch (batch_size rows) from train_data, see batch_size in DataLoader | |
data_list = gluon.utils.split_and_load(data, ctx) # split batch_size into num_gpu devices | |
label_list = gluon.utils.split_and_load(label, ctx) | |
with autograd.record(): | |
losses = [softmax_cross_entropy(net(X), y) | |
for X, y in zip(data_list, label_list)] | |
for l in losses: | |
l.backward() | |
trainer.step(batch_size) | |
# Sum losses over all devices | |
train_loss += sum([l.sum().asscalar() for l in losses]) | |
if (e % 5 == 0): # calculate accuracy every 5th epoch | |
test_acc = eval_acc_feevos2(net, test_data) #eval_acc_cpu(net, test_data_l, test_label_l) | |
train_acc = eval_acc_feevos2(net, train_data) #eval_acc_cpu(net, train_data_l, train_label_l) | |
print("Epoch %d: Loss: %.3f, train_accuracy %.3f, test_accuracy %.3f, Time %.1f sec" % | |
(e, train_loss/len(train_data)/batch_size, train_acc, test_acc, time()-tic)) | |
return (acc.get()[1]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment