Skip to content

Instantly share code, notes, and snippets.

@sbodenstein
Created July 27, 2016 23:11
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sbodenstein/eced2eb5a3e2bb8be0a2eb083adf8ae7 to your computer and use it in GitHub Desktop.
Save sbodenstein/eced2eb5a3e2bb8be0a2eb083adf8ae7 to your computer and use it in GitHub Desktop.
Compare Torch + MXNet cuDNN RNN API's
--
require 'cudnn'
require 'cunn'
torch.setdefaulttensortype('torch.FloatTensor')
-- Get weight dim
function checkSums(rnn, seqLength, batch, inputDim, hiddenSize, layerNum, bidirectional)
rnn:reset()
rnn:resetWeightDescriptor()
if bidirectional then biDirectionalScale = 2 else biDirectionalScale = 1 end
local dataSize = inputDim*seqLength*batch
local data = torch.range(1, dataSize):div(dataSize):resize(seqLength, batch, inputDim):cuda()
rnn.weight = torch.range(1, rnn.weight:size()[1]):div(rnn.weight:size()[1]):cuda()
rnn.hiddenInput = torch.CudaTensor(layerNum * biDirectionalScale, batch, hiddenSize):fill(0.3)
rnn.cellInput = torch.CudaTensor(layerNum * biDirectionalScale, batch, hiddenSize):fill(0.3)
rnn.gradHiddenOutput = torch.CudaTensor(layerNum * biDirectionalScale, batch, hiddenSize):fill(1)
rnn.gradCellOutput = torch.CudaTensor(layerNum * biDirectionalScale, batch, hiddenSize):fill(1)
local testOutputi = rnn:forward(data)
local gradInput = torch.CudaTensor(seqLength, batch, hiddenSize * biDirectionalScale):fill(1)
rnn:backward(data, gradInput)
local localSumi = torch.sum(testOutputi)
local localSumh = torch.sum(rnn.hiddenOutput)
local localSumc = torch.sum(rnn.cellOutput)
local localSumdi = torch.sum(rnn.gradInput)
local localSumdh = torch.sum(rnn.gradHiddenInput)
local localSumdc = torch.sum(rnn.gradCellInput)
local localSumdw = torch.sum(rnn.gradWeight)
check = {
Outputs = {
data = localSumi,
state = localSumh,
state_cell = localSumc
},
Gradients = {
data = localSumdi,
state = localSumdh,
state_cell = localSumdc,
parameters = localSumdw
}
}
return check
end
-- test 1
batch= 5
seqLength = 9
inputDim = 7
hiddenSize = 5
layerNum = 3
print("GRU ################")
rnn = cudnn.GRU(inputDim, hiddenSize, layerNum, false)
rnn.mode = 'CUDNN_GRU'
print(checkSums(rnn, seqLength, batch, inputDim, hiddenSize, layerNum, false))
print("Bidirectional GRU ################")
rnn = cudnn.BGRU(inputDim, hiddenSize, layerNum, false)
rnn.mode = 'CUDNN_GRU'
print(checkSums(rnn, seqLength, batch, inputDim, hiddenSize, layerNum, true))
print("LSTM ################")
rnn = cudnn.LSTM(inputDim, hiddenSize, layerNum, false)
rnn.mode = 'CUDNN_LSTM'
print(checkSums(rnn, seqLength, batch, inputDim, hiddenSize, layerNum, false))
import mxnet as mx
import numpy as np
def checkSums(rnn_mode, seqLength, batchSize, inputDim, hiddenSize, layerNum, bidirectional):
sym = mx.symbol.RNN(
data=mx.symbol.Variable("data"),
parameters=mx.symbol.Variable("parameters"),
state=mx.symbol.Variable("state"),
mode=rnn_mode,
state_outputs=True,
num_layers=layerNum,
state_size=hiddenSize,
p=0,
bidirectional=bidirectional
)
ex = sym.simple_bind(data = (seqLength, batchSize, inputDim), ctx=mx.gpu())
# create non trivial input arrays
data = ex.arg_dict['data'];
param = ex.arg_dict['parameters'];
state = ex.arg_dict['state'];
data[:].reshape([data.size])[:] = (np.arange(data.size) + 1.0)/data.size
param[:].reshape([param.size])[:] = (np.arange(param.size) + 1.0)/param.size
state[:] = 0.3
# set state cell if lstm
if(rnn_mode == "lstm"):
ex.arg_arrays[-1][:] = 0.3
# do forward pass
ex.forward(is_train=True)
# get grad arrays
grads = map(lambda nd: mx.nd.ones(nd.shape, ctx=mx.gpu()), ex.outputs)
# do backward
ex.backward(out_grads=grads)
# construct chksum
outputChk = {}
outputChk["Outputs"] = {
"data" : mx.nd.sum(ex.outputs[0]).asnumpy()[0],
"state" : mx.nd.sum(ex.outputs[1]).asnumpy()[0]
}
grads = ex.grad_dict
outputChk["Gradients"] = {
"data" : mx.nd.sum(grads['data']).asnumpy()[0],
"parameters" : mx.nd.sum(grads['parameters']).asnumpy()[0],
"state" : mx.nd.sum(grads['state']).asnumpy()[0]
}
if(rnn_mode == "lstm"):
outputChk["Outputs"]["state_cell"] = mx.nd.sum(ex.outputs[-1]).asnumpy()[0]
outputChk["Gradients"]["state_cell"] = mx.nd.sum(ex.grad_arrays[-1]).asnumpy()[0]
return outputChk
# Run tests with standard settings
seqLength = 9
batch = 5
inputDim = 7
hiddenSize = 5
layerNum = 3
print("GRU ################")
print(checkSums('gru', seqLength, batch, inputDim, hiddenSize, layerNum, False))
print("Bidirectional GRU ################")
print(checkSums('gru', seqLength, batch, inputDim, hiddenSize, layerNum, True))
print("LSTM ################")
print(checkSums('lstm', seqLength, batch, inputDim, hiddenSize, layerNum, False))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment