Skip to content

Instantly share code, notes, and snippets.

@zer0n
Last active March 1, 2016 03:09
Show Gist options
  • Save zer0n/472ac043e932aad351be to your computer and use it in GitHub Desktop.
Save zer0n/472ac043e932aad351be to your computer and use it in GitHub Desktop.
require 'sys';
require 'bit';
require 'cunn';
require 'cudnn';
require 'optim';
torch.setdefaulttensortype('torch.FloatTensor')
local steps = 100 -- number of runs
local Linear = nn.Linear
local Transfer = nn.Sigmoid
local isize = 512
local hsize = 2048
local osize = 10000
-- Network definition
local mlp = nn.Sequential()
mlp:add(Linear(isize,hsize)):add(Transfer(true)) -- hidden layer 1
mlp:add(Linear(hsize,hsize)):add(Transfer(true)) -- hidden layer 2
mlp:add(Linear(hsize,hsize)):add(Transfer(true)) -- hidden layer 3
mlp:add(Linear(hsize,hsize)):add(Transfer(true)) -- hidden layer 4
mlp:add(Linear(hsize,osize)):add(cudnn.LogSoftMax()) -- output layer
-- Fake data
local bsize = 8192
local inputCPU = torch.randn(bsize,isize)
local input = torch.CudaTensor(inputCPU:size())
local target = torch.IntTensor(bsize):random(1,bsize):cuda()
for k=0,2 do
nGPU = bit.lshift(1,k)
local model = nil
if nGPU > 1 then
model = nn.DataParallelTable(1, true, true):threads(function() require 'cunn'; require 'cudnn'; end)
model:add(mlp:cuda(), torch.range(1,nGPU):totable())
else
model = mlp:cuda()
end
-- optimizer declarations
local criterion = nn.ClassNLLCriterion():cuda()
local parameters, gradParameters = model:getParameters()
local optimState = { learningRate = 0.01 }
collectgarbage()
sys.tic()
for t = 1, steps do
input:copy(inputCPU) -- transfer data to GPU memory
feval = function(x)
model:zeroGradParameters()
local output = model:forward(input)
local err = criterion:forward(output, target)
local gradOutput = criterion:backward(output, target)
local gradInput = model:backward(input, gradOutput)
return err, gradParameters
end
optim.sgd(feval, parameters, optimState)
cutorch.synchronize()
end
local elapsed = sys.toc()
print(string.format("%d GPUs: %0.0f samples per sec", nGPU, steps * bsize / elapsed))
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment