Skip to content

Instantly share code, notes, and snippets.

@nagadomi
Created June 22, 2016 01:12
Show Gist options
  • Save nagadomi/0be1f35629e3785fdba1914a1fe58fc4 to your computer and use it in GitHub Desktop.
Save nagadomi/0be1f35629e3785fdba1914a1fe58fc4 to your computer and use it in GitHub Desktop.
require 'cutorch'
require 'cunn'
require 'cudnn'
require 'sys'
-- WINOGRAD benchmark
-- required: cuDNN v5, cudnn.torch R5 branch
function create_model(ch)
local model = nn.Sequential()
model:add(cudnn.SpatialConvolution(ch, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 1, 3, 3, 1, 1, 1, 1))
return model
end
function setMode(model, modes)
local modules = model:findModules("cudnn.SpatialConvolution")
for i = 1, #modules do
modules[i]:setMode(table.unpack(modes))
modules[i].workspace_limit = (modules[i].nInputPlane * modules[i].nOutputPlane) * 64
--modules[i].fastest = true
end
end
cudnn.benchmark = false
cudnn.fastest = true
cudnn.verbose = true
CH = 1
TRIES = 10
model1 = create_model(1):cuda()
model2 = model1:clone()
model3 = model1:clone()
setMode(model1, {'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM'})
setMode(model2, {'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM'})
setMode(model3, {'CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD'})
input = torch.Tensor(1, CH, 512, 512):cuda()
sum = 0
for i = 1, 10 do
sum = sum + model1:forward(input:fill(i)):sum()
end
t = sys.clock()
sum = 0
for i = 1, TRIES do
sum = sum + model1:forward(input:fill(i)):sum()
end
t1 = sys.clock() - t
print("IMPLICIT_GEMM", t1 / TRIES, sum)
t = sys.clock()
sum = 0
for i = 1, TRIES do
sum = sum + model2:forward(input:fill(i)):sum()
end
t2 = sys.clock() - t
print("IMPLICIT_PRECOMP_GEMM", t2 / TRIES, sum)
t = sys.clock()
sum = 0
for i = 1, TRIES do
sum = sum + model3:forward(input:fill(i)):sum()
end
t3 = sys.clock() - t
print("WINOGRAD", t3 / TRIES, sum)
@nagadomi
Copy link
Author

IMPLICIT_GEMM   0.26035809516907    71863.444335938 
IMPLICIT_PRECOMP_GEMM   0.13740830421448    71863.444335938 
WINOGRAD    0.090363383293152   71863.450195312 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment