soumith/gist:6011923

## gistfile1.lua
#!/usr/bin/env torch
require 'nn'
require 'image'
require 'xlua'
require 'pl'


opt = lapp[[
		 -t,--threads            (default 8)           number of threads
   -p,--type               (default float)       float or cuda
   -i,--devid              (default 1)           device ID (if using CUDA)
]]

p = xlua.Profiler()
torch.setnumthreads(opt.threads)
torch.manualSeed(1)
torch.setdefaulttensortype('torch.FloatTensor')


if opt.type == 'cuda' then
   print('==> switching to CUDA')
   require 'cunn'
   cutorch.setDevice(opt.devid)
   print('==> using GPU #' .. cutorch.getDevice())

   nn.SpatialConvolutionMM = nn.SpatialConvolution
end

iH = 192
iW = 192
fin = 16
fout = 32
kH = 10
kW = 10
batchSize = 128

-- input:
lena1 = torch.Tensor(batchSize,fin,iH,iW)

-- model to test:
model = nn.SpatialConvolution(fin, fout, kW, kW)

-- copy to GPU if desired:
if opt.type == 'cuda' then
   model = nn.SpatialConvolutionCUDA(fin, fout, kW, kW):cuda()
   lena1 = torch.CudaTensor(fin,iH,iW,batchSize)
end

-- test speed:
p:start('spatialconv')
lena2 = model:forward(lena1)
if opt.type == 'cuda' then cutorch.synchronize() end
p:lap('spatialconv')

p:printAll{}


print('Gops/s:', ( batchSize*fin*fout*kH*kW*((iH-kH)+1)*((iW-kW)+1)*2 ) / p:cpu('spatialconv') / 1e9 ) -- 2 operations MUL, ACC
	#!/usr/bin/env torch
	require 'nn'
	require 'image'
	require 'xlua'
	require 'pl'


	opt = lapp[[
	-t,--threads (default 8) number of threads
	-p,--type (default float) float or cuda
	-i,--devid (default 1) device ID (if using CUDA)
	]]

	p = xlua.Profiler()
	torch.setnumthreads(opt.threads)
	torch.manualSeed(1)
	torch.setdefaulttensortype('torch.FloatTensor')


	if opt.type == 'cuda' then
	print('==> switching to CUDA')
	require 'cunn'
	cutorch.setDevice(opt.devid)
	print('==> using GPU #' .. cutorch.getDevice())

	nn.SpatialConvolutionMM = nn.SpatialConvolution
	end

	iH = 192
	iW = 192
	fin = 16
	fout = 32
	kH = 10
	kW = 10
	batchSize = 128

	-- input:
	lena1 = torch.Tensor(batchSize,fin,iH,iW)

	-- model to test:
	model = nn.SpatialConvolution(fin, fout, kW, kW)

	-- copy to GPU if desired:
	if opt.type == 'cuda' then
	model = nn.SpatialConvolutionCUDA(fin, fout, kW, kW):cuda()
	lena1 = torch.CudaTensor(fin,iH,iW,batchSize)
	end

	-- test speed:
	p:start('spatialconv')
	lena2 = model:forward(lena1)
	if opt.type == 'cuda' then cutorch.synchronize() end
	p:lap('spatialconv')

	p:printAll{}


	print('Gops/s:', ( batchSizefinfoutkHkW((iH-kH)+1)((iW-kW)+1)*2 ) / p:cpu('spatialconv') / 1e9 ) -- 2 operations MUL, ACC