Skip to content

Instantly share code, notes, and snippets.

@nagadomi
Last active March 21, 2017 10:15
Show Gist options
  • Save nagadomi/e86cb6390af77ca83d4073b466e93e42 to your computer and use it in GitHub Desktop.
Save nagadomi/e86cb6390af77ca83d4073b466e93e42 to your computer and use it in GitHub Desktop.
require 'cutorch'
require 'cunn'
require 'cudnn'
require 'sys'
-- WINOGRAD benchmark
-- required: cuDNN v5, cudnn.torch R5 branch
function create_model(ch) -- simple 3x3 conv model
local model = nn.Sequential()
model:add(cudnn.SpatialConvolution(ch, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
model:add(cudnn.SpatialConvolution(64, 64, 3, 3, 1, 1, 1, 1))
model:add(cudnn.ReLU())
return model
end
function setMode(model, modes)
local modules = model:findModules("cudnn.SpatialConvolution")
for i = 1, #modules do
modules[i]:setMode(table.unpack(modes))
modules[i].workspace_limit = (modules[i].nInputPlane * modules[i].nOutputPlane) * 64
--modules[i].fastest = true
end
end
cudnn.benchmark = false -- don't use auto tuning
cudnn.fastest = true
cudnn.verbose = true
model1 = create_model(64):cuda()
model2 = model1:clone()
setMode(model1, {'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM'})
setMode(model2, {'CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD'}) -- use WINOGRAD
input = torch.Tensor(1, 64, 512, 512):cuda()
-- warm :)
sum = 0
for i = 1, 10 do
sum = sum + model1:forward(input:fill(i)):sum()
end
-- test model1
t = sys.clock()
sum = 0
for i = 1, 10 do
sum = sum + model1:forward(input:fill(i)):sum()
end
t1 = sys.clock() - t
print(t1, sum)
-- test model2
t = sys.clock()
sum = 0
for i = 1, 10 do
sum = sum + model2:forward(input:fill(i)):sum()
end
t2 = sys.clock() - t
print(t2, sum)
print(("%.2f x faster!"):format(t1 / t2))
@nagadomi
Copy link
Author

0.30708813667297 112713675.625
0.11358094215393 112713691.875
2.70 x faster!

Copy link

ghost commented Mar 21, 2017

Can I get Winograd code in C++?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment