Created
October 10, 2018 02:57
-
-
Save feevos/abd71e2ad467286794dc3061768bb790 to your computer and use it in GitHub Desktop.
Example of multi-gpu accuracy evaluation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from __future__ import print_function\n", | |
"import numpy as np\n", | |
"import mxnet as mx\n", | |
"from mxnet import nd, autograd, gluon\n", | |
"from time import time\n", | |
"import warnings\n", | |
"warnings.filterwarnings('ignore')\n", | |
"mx.random.seed(1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"batch_size = 256\n", | |
"num_inputs = 784\n", | |
"num_outputs = 10\n", | |
"num_gpus = 1\n", | |
"learning_rate = .1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ctx = [mx.gpu(i) for i in range(num_gpus)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def transform(data, label):\n", | |
" return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)\n", | |
"\n", | |
"train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True, transform=transform),\n", | |
" batch_size, shuffle=True, num_workers=4,last_batch='discard')\n", | |
"test_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=False, transform=transform),\n", | |
" batch_size, shuffle=False, num_workers=4,last_batch='discard')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"num_fc = 512\n", | |
"net = gluon.nn.Sequential()\n", | |
"with net.name_scope():\n", | |
" net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation='relu'))\n", | |
" net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))\n", | |
" net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, activation='relu'))\n", | |
" net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))\n", | |
" # The Flatten layer collapses all axis, except the first one, into one axis.\n", | |
" net.add(gluon.nn.Flatten())\n", | |
" net.add(gluon.nn.Dense(num_fc, activation=\"relu\"))\n", | |
" net.add(gluon.nn.Dense(num_outputs))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"net.initialize(mx.init.Xavier(magnitude=2.24), force_reinit=True, ctx=ctx)\n", | |
"net.hybridize(static_alloc=True,static_shape=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': learning_rate})" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# train_data_l and test_data_l are redundant since you've defined your dataloader objects above" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Load training data into GPUs, each data_l contains arrays deployed to gpu1/2/3/4\n", | |
"# there will be 235 loop iterations\n", | |
"# train_data_l = []\n", | |
"# train_label_l = []\n", | |
"# for data,label in train_data:\n", | |
"# train_data_l.append(gluon.utils.split_and_load(data, ctx))\n", | |
"# train_label_l.append(gluon.utils.split_and_load(label, ctx))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Load test data inro GPUs\n", | |
"# test_data_l = []\n", | |
"# test_label_l = []\n", | |
"# for data,label in test_data:\n", | |
"# test_data_l.append(gluon.utils.split_and_load(data, ctx))\n", | |
"# test_label_l.append(gluon.utils.split_and_load(label, ctx))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"data_l - List with 235 elements, each element of data_l is\n", | |
"List of 4 elements, each of these 4 elems is\n", | |
"NDArray of shape: (64, 1, 28, 28)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Default accuracy function (this only works on one GPU and won't work for ctx = [gpu(0), gpu(1),])\n", | |
"def evaluate_accuracy(data_iterator, net):\n", | |
" acc = mx.metric.Accuracy()\n", | |
" for i, (data, label) in enumerate(data_iterator):\n", | |
" data = data.as_in_context(ctx)\n", | |
" label = label.as_in_context(ctx)\n", | |
" output = net(data)\n", | |
" predictions = nd.argmax(output, axis=1)\n", | |
" acc.update(preds=predictions, labels=label)\n", | |
" return acc.get()[1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Suggession by feevos: runs\n", | |
"def eval_acc_feevos1(net, _data_generator):\n", | |
" acc = mx.metric.Accuracy() # Single accuracy \n", | |
" for i, (tdata, tlabel) in enumerate(_data_generator):\n", | |
" data = tdata.as_in_context(mx.gpu(0))\n", | |
" label = nd.array(tlabel) # keep this in cpu context, since this is already done inside the definition of Accuracy\n", | |
" pred = nd.argmax(net(data),axis=1).as_in_context(mx.cpu())\n", | |
" acc.update(preds=pred,labels=label)\n", | |
" return (acc.get()[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Suggession by feevos: runs\n", | |
"def eval_acc_feevos2(net, _data_generator):\n", | |
" acc = mx.metric.Accuracy() # Single accuracy \n", | |
" for i, (tdata, tlabel) in enumerate(_data_generator):\n", | |
" data = gluon.utils.split_and_load(tdata, ctx)\n", | |
" label = nd.array(tlabel) # keep this in cpu context, since this is already done inside the definition of Accuracy \n", | |
" # Perform inference on each separate GPU \n", | |
" pred = [nd.argmax(net(X),axis=1).as_in_context(mx.cpu()) for X in data]\n", | |
" pred = nd.concat(*pred,dim=0) # Collect results\n", | |
" \n", | |
" acc.update(preds=pred,labels=label) # update single accuracy\n", | |
"\n", | |
" return (acc.get()[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# This works, but ugly, slow and requires loading labels into GPUs, which is redundant!\n", | |
"# As we see below accuracy calculation adds ~20 seconds into epoch time\n", | |
"# See more at: https://discuss.mxnet.io/t/evaluate-accuracy-on-multi-gpu-machine/1972\n", | |
"def eval_acc(net, data_l, label_l):\n", | |
" acc = [mx.metric.Accuracy() for i in range(num_gpus)]\n", | |
" for i, (data, label) in enumerate(zip(data_l, label_l)): # loop on 235 batches\n", | |
" D=[data[n].as_in_context(mx.gpu(n)) for n in range(0,num_gpus)]\n", | |
" L=[label[n].as_in_context(mx.gpu(n)) for n in range(0,num_gpus)]\n", | |
" P = [nd.argmax(net(d), axis=1) for d in D]\n", | |
" [a.update(preds=p, labels=l) for p, a, l in zip(P, acc, L)]\n", | |
" return sum([a.get()[1] for a in acc])/num_gpus" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Epoch 0: Loss: 0.103, train_accuracy 0.977, test_accuracy 0.979, Time 3.7 sec\n", | |
"Epoch 1: Loss: 0.071, train_accuracy 0.977, test_accuracy 0.979, Time 2.9 sec\n", | |
"Epoch 2: Loss: 0.057, train_accuracy 0.977, test_accuracy 0.979, Time 2.9 sec\n", | |
"Epoch 3: Loss: 0.049, train_accuracy 0.977, test_accuracy 0.979, Time 3.2 sec\n", | |
"Epoch 4: Loss: 0.042, train_accuracy 0.977, test_accuracy 0.979, Time 3.1 sec\n", | |
"Epoch 5: Loss: 0.036, train_accuracy 0.992, test_accuracy 0.990, Time 3.3 sec\n", | |
"Epoch 6: Loss: 0.031, train_accuracy 0.992, test_accuracy 0.990, Time 2.6 sec\n", | |
"Epoch 7: Loss: 0.028, train_accuracy 0.992, test_accuracy 0.990, Time 2.7 sec\n", | |
"Epoch 8: Loss: 0.025, train_accuracy 0.992, test_accuracy 0.990, Time 2.6 sec\n", | |
"Epoch 9: Loss: 0.023, train_accuracy 0.992, test_accuracy 0.990, Time 2.7 sec\n", | |
"Epoch 10: Loss: 0.021, train_accuracy 0.995, test_accuracy 0.990, Time 3.4 sec\n" | |
] | |
} | |
], | |
"source": [ | |
"epochs = 11\n", | |
"smoothing_constant = .01\n", | |
"test_acc = train_acc = 0\n", | |
"\n", | |
"for e in range(epochs):\n", | |
" train_loss = 0.\n", | |
" tic = time()\n", | |
" c=1\n", | |
" for data, label in train_data: # read the batch (batch_size rows) from train_data, see batch_size in DataLoader\n", | |
" data_list = gluon.utils.split_and_load(data, ctx) # split batch_size into num_gpu devices\n", | |
" label_list = gluon.utils.split_and_load(label, ctx)\n", | |
"\n", | |
" with autograd.record():\n", | |
" losses = [softmax_cross_entropy(net(X), y)\n", | |
" for X, y in zip(data_list, label_list)]\n", | |
" for l in losses:\n", | |
" l.backward()\n", | |
"\n", | |
" trainer.step(batch_size)\n", | |
" # Sum losses over all devices\n", | |
" train_loss += sum([l.sum().asscalar() for l in losses])\n", | |
" \n", | |
" if (e % 5 == 0): # calculate accuracy every 5th epoch\n", | |
" test_acc = eval_acc(net, test_data_l, test_label_l) #eval_acc_cpu(net, test_data_l, test_label_l)\n", | |
" train_acc = eval_acc(net, train_data_l, train_label_l) #eval_acc_cpu(net, train_data_l, train_label_l)\n", | |
" \n", | |
" print(\"Epoch %d: Loss: %.3f, train_accuracy %.3f, test_accuracy %.3f, Time %.1f sec\" % \n", | |
" (e, train_loss/len(train_data)/batch_size, train_acc, test_acc, time()-tic))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"net.save_params(\"models/cnn_4gpu_mnist.par\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Epoch 0: Loss: 0.012, train_accuracy 0.992, test_accuracy 0.985, Time 5.4 sec\n", | |
"Epoch 1: Loss: 0.011, train_accuracy 0.992, test_accuracy 0.985, Time 2.5 sec\n", | |
"Epoch 2: Loss: 0.011, train_accuracy 0.992, test_accuracy 0.985, Time 2.8 sec\n", | |
"Epoch 3: Loss: 0.009, train_accuracy 0.992, test_accuracy 0.985, Time 2.5 sec\n", | |
"Epoch 4: Loss: 0.008, train_accuracy 0.992, test_accuracy 0.985, Time 2.6 sec\n", | |
"Epoch 5: Loss: 0.008, train_accuracy 0.998, test_accuracy 0.991, Time 5.4 sec\n", | |
"Epoch 6: Loss: 0.007, train_accuracy 0.998, test_accuracy 0.991, Time 2.6 sec\n" | |
] | |
} | |
], | |
"source": [ | |
"epochs = 7\n", | |
"smoothing_constant = .01\n", | |
"test_acc = train_acc = 0\n", | |
"\n", | |
"for e in range(epochs):\n", | |
" train_loss = 0.\n", | |
" tic = time()\n", | |
" c=1\n", | |
" for data, label in train_data: # read the batch (batch_size rows) from train_data, see batch_size in DataLoader\n", | |
" data_list = gluon.utils.split_and_load(data, ctx) # split batch_size into num_gpu devices\n", | |
" label_list = gluon.utils.split_and_load(label, ctx)\n", | |
"\n", | |
" with autograd.record():\n", | |
" losses = [softmax_cross_entropy(net(X), y)\n", | |
" for X, y in zip(data_list, label_list)]\n", | |
" for l in losses:\n", | |
" l.backward()\n", | |
"\n", | |
" trainer.step(batch_size)\n", | |
" # Sum losses over all devices\n", | |
" train_loss += sum([l.sum().asscalar() for l in losses])\n", | |
" \n", | |
" if (e % 5 == 0): # calculate accuracy every 5th epoch\n", | |
" test_acc = eval_acc_feevos2(net, test_data) #eval_acc_cpu(net, test_data_l, test_label_l)\n", | |
" train_acc = eval_acc_feevos2(net, train_data) #eval_acc_cpu(net, train_data_l, train_label_l)\n", | |
" \n", | |
" print(\"Epoch %d: Loss: %.3f, train_accuracy %.3f, test_accuracy %.3f, Time %.1f sec\" % \n", | |
" (e, train_loss/len(train_data)/batch_size, train_acc, test_acc, time()-tic))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.9901842948717948" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"eval_acc_feevos2(net,test_data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment