Skip to content

Instantly share code, notes, and snippets.

@czotti
Last active December 8, 2018 14:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save czotti/4dc532927ba32481e5b538b99eff2894 to your computer and use it in GitHub Desktop.
Save czotti/4dc532927ba32481e5b538b99eff2894 to your computer and use it in GitHub Desktop.
Issues with speed pytorch 0.4.1 and 1.0
************************************** TENSOR SIZE (1, 2, 256, 256, 256) **************************************
Iter 1/5, loss: 1.1791579723358154
----------------------------------- --------------- --------------- --------------- --------------- ---------------
Name CPU time CUDA time Calls CPU total CUDA total
----------------------------------- --------------- --------------- --------------- --------------- ---------------
convolution 1993651.502us 1992817.026us 4 7974606.009us 7971268.105us
add 1618.376us 6155.640us 4 6473.504us 24622.559us
torch::autograd::AccumulateGrad 6500.853us 7.487us 6 39005.119us 44.922us
is_floating_point 9.718us 7.812us 1 9.718us 7.812us
cudnn_convolution_backward 14125154.892us 15425690.918us 4 56500619.569us 61702763.672us
EluBackward 921.245us 6175.781us 2 1842.491us 12351.562us
mul_ 18.442us 17.578us 8 147.535us 140.625us
elu_ 591.556us 4349.609us 2 1183.112us 8699.219us
add_ 22.765us 26.042us 12 273.183us 312.500us
torch::autograd::CopyBackwards 2655614.432us 40296.875us 2 5311228.865us 80593.750us
mean 5391.290us 639.648us 1 5391.290us 639.648us
mul 333.253us 1546.875us 3 999.759us 4640.625us
torch::autograd::GraphRoot 22.302us 7.324us 1 22.302us 7.324us
PowBackward0 1596.300us 4652.832us 1 1596.300us 4652.832us
cudnn_convolution 1993596.314us 1992796.013us 4 7974385.254us 7971184.053us
div 106.370us 744.629us 1 106.370us 744.629us
AddBackward0 4.344us 0.000us 2 8.687us 0.000us
pow 2354.389us 1340.088us 2 4708.778us 2680.176us
item 62.608us 62.500us 1 62.608us 62.500us
neg 47.268us 1349.609us 1 47.268us 1349.609us
conv3d 1993968.818us 1993127.823us 4 7975875.273us 7972511.291us
_local_scalar_dense 46.377us 46.875us 1 46.377us 46.875us
SubBackward0 110.337us 2691.895us 1 110.337us 2691.895us
elu_backward 898.719us 6172.852us 2 1797.438us 12345.703us
addcdiv_ 21.728us 23.438us 4 86.912us 93.750us
expand 50.636us 44.922us 1 50.636us 44.922us
empty 14.302us 5.584us 6 85.811us 33.503us
MeanBackward1 3023.451us 3645.508us 1 3023.451us 3645.508us
CudnnConvolutionBackward 14125176.846us 15425693.115us 4 56500707.384us 61702772.461us
_convolution 1993640.512us 1992810.689us 4 7974562.046us 7971242.754us
contiguous 5.430us 2.779us 4 21.721us 11.115us
addcmul_ 26.447us 27.344us 4 105.788us 109.375us
sqrt 328.292us 330.078us 4 1313.167us 1320.312us
sub 95.299us 1945.801us 1 95.299us 1945.801us
Iter 2/5, loss: 1.1788948774337769
----------------------------------- --------------- --------------- --------------- --------------- ---------------
Name CPU time CUDA time Calls CPU total CUDA total
----------------------------------- --------------- --------------- --------------- --------------- ---------------
convolution 699.150us 72173.980us 4 2796.602us 288695.921us
add 316.585us 6046.371us 4 1266.338us 24185.486us
torch::autograd::AccumulateGrad 12638.065us 12623.779us 6 75828.392us 75742.676us
is_floating_point 4.729us 3.906us 1 4.729us 3.906us
cudnn_convolution_backward 299.305us 5588712.349us 4 1197.218us 22354849.396us
EluBackward 47.975us 6209.229us 2 95.949us 12418.457us
mul_ 19.369us 19.043us 8 154.951us 152.344us
elu_ 25.809us 4032.509us 2 51.618us 8065.018us
detach_ 4.103us 3.968us 4 16.412us 15.872us
add_ 24.862us 27.832us 12 298.341us 333.984us
torch::autograd::CopyBackwards 11391234.402us 44634.766us 2 22782468.804us 89269.531us
mean 46.818us 633.850us 1 46.818us 633.850us
mul 199.558us 1555.878us 3 598.675us 4667.633us
torch::autograd::GraphRoot 8.897us 1.038us 1 8.897us 1.038us
PowBackward0 639.862us 4645.874us 1 639.862us 4645.874us
cudnn_convolution 664.946us 72155.972us 4 2659.785us 288623.887us
div 81.333us 689.148us 1 81.333us 689.148us
AddBackward0 3.917us 1.465us 2 7.834us 2.930us
pow 33.493us 1348.114us 2 66.986us 2696.228us
item 51.026us 50.781us 1 51.026us 50.781us
neg 28.785us 1337.341us 1 28.785us 1337.341us
conv3d 706.915us 72177.319us 4 2827.661us 288709.275us
_local_scalar_dense 41.238us 41.016us 1 41.238us 41.016us
CudnnConvolutionBackward 313.153us 5588715.538us 4 1252.611us 22354862.152us
zero_ 10.031us 10.176us 4 40.124us 40.704us
elu_backward 33.999us 6206.787us 2 67.998us 12413.574us
addcdiv_ 20.296us 20.508us 4 81.182us 82.031us
expand 18.264us 1.038us 1 18.264us 1.038us
empty 9.044us 4.731us 6 54.262us 28.386us
MeanBackward1 141.966us 695.312us 1 141.966us 695.312us
SubBackward0 84.930us 2718.719us 1 84.930us 2718.719us
_convolution 692.035us 72169.436us 4 2768.139us 288677.742us
contiguous 3.769us 1.936us 4 15.078us 7.745us
addcmul_ 26.239us 26.367us 4 104.957us 105.469us
sqrt 26.159us 26.367us 4 104.637us 105.469us
sub 21.841us 1951.752us 1 21.841us 1951.752us
Iter 3/5, loss: 1.1786322593688965
----------------------------------- --------------- --------------- --------------- --------------- ---------------
Name CPU time CUDA time Calls CPU total CUDA total
----------------------------------- --------------- --------------- --------------- --------------- ---------------
convolution 115.953us 72240.726us 4 463.811us 288962.903us
add 26.778us 5852.512us 4 107.111us 23410.049us
torch::autograd::AccumulateGrad 12776.725us 12771.322us 6 76660.350us 76627.930us
is_floating_point 4.839us 3.906us 1 4.839us 3.906us
cudnn_convolution_backward 68.366us 5420700.531us 4 273.463us 21682802.124us
EluBackward 23.620us 5873.291us 2 47.239us 11746.582us
mul_ 18.426us 17.090us 8 147.405us 136.719us
elu_ 25.794us 4043.266us 2 51.587us 8086.533us
detach_ 3.637us 3.536us 4 14.548us 14.144us
add_ 23.967us 26.367us 12 287.602us 316.406us
torch::autograd::CopyBackwards 11058073.875us 45457.031us 2 22116147.749us 90914.062us
mean 71.585us 632.812us 1 71.585us 632.812us
mul 18.398us 1548.971us 3 55.194us 4646.912us
torch::autograd::GraphRoot 7.223us 2.045us 1 7.223us 2.045us
PowBackward0 75.963us 4649.963us 1 75.963us 4649.963us
cudnn_convolution 77.195us 72222.633us 4 308.780us 288890.531us
div 28.314us 684.021us 1 28.314us 684.021us
AddBackward0 2.365us 1.465us 2 4.729us 2.930us
pow 23.043us 1359.344us 2 46.087us 2718.689us
item 42.901us 41.016us 1 42.901us 41.016us
neg 14.948us 1357.819us 1 14.948us 1357.819us
conv3d 124.441us 72244.582us 4 497.765us 288978.326us
_local_scalar_dense 33.393us 33.203us 1 33.393us 33.203us
CudnnConvolutionBackward 75.600us 5420703.720us 4 302.398us 21682814.880us
zero_ 12.206us 12.312us 4 48.822us 49.248us
elu_backward 17.523us 5870.850us 2 35.045us 11741.699us
addcdiv_ 20.186us 20.508us 4 80.742us 82.031us
expand 6.963us 1.038us 1 6.963us 1.038us
empty 12.265us 7.218us 6 73.588us 43.308us
MeanBackward1 48.782us 689.148us 1 48.782us 689.148us
SubBackward0 44.032us 2709.503us 1 44.032us 2709.503us
_convolution 107.940us 72236.916us 4 431.761us 288947.665us
contiguous 4.273us 2.292us 4 17.092us 9.170us
addcmul_ 22.434us 22.949us 4 89.738us 91.797us
sqrt 24.807us 24.902us 4 99.226us 99.609us
sub 31.449us 1940.491us 1 31.449us 1940.491us
Iter 4/5, loss: 1.1783697605133057
----------------------------------- --------------- --------------- --------------- --------------- ---------------
Name CPU time CUDA time Calls CPU total CUDA total
----------------------------------- --------------- --------------- --------------- --------------- ---------------
convolution 120.761us 71523.126us 4 483.046us 286092.502us
add 26.231us 5901.585us 4 104.926us 23606.339us
torch::autograd::AccumulateGrad 13553.335us 13550.130us 6 81320.012us 81300.781us
is_floating_point 5.019us 3.906us 1 5.019us 3.906us
cudnn_convolution_backward 62.184us 5440159.782us 4 248.737us 21760639.130us
EluBackward 22.026us 5857.666us 2 44.052us 11715.332us
mul_ 18.000us 17.090us 8 144.000us 136.719us
elu_ 38.648us 4153.336us 2 77.296us 8306.671us
detach_ 3.740us 3.656us 4 14.959us 14.624us
add_ 21.632us 24.251us 12 259.586us 291.016us
torch::autograd::CopyBackwards 11099595.407us 49291.016us 2 22199190.814us 98582.031us
mean 70.432us 629.761us 1 70.432us 629.761us
mul 15.599us 1548.960us 3 46.798us 4646.881us
torch::autograd::GraphRoot 7.865us 1.007us 1 7.865us 1.007us
PowBackward0 67.627us 4671.478us 1 67.627us 4671.478us
cudnn_convolution 81.320us 71505.100us 4 325.282us 286020.400us
div 25.087us 684.021us 1 25.087us 684.021us
AddBackward0 2.184us 1.953us 2 4.368us 3.906us
pow 26.374us 1354.767us 2 52.749us 2709.534us
item 43.441us 42.969us 1 43.441us 42.969us
neg 16.912us 1345.520us 1 16.912us 1345.520us
conv3d 129.679us 71527.043us 4 518.715us 286108.173us
_local_scalar_dense 34.405us 33.203us 1 34.405us 33.203us
CudnnConvolutionBackward 69.417us 5440162.598us 4 277.670us 21760650.391us
zero_ 12.241us 12.360us 4 48.962us 49.440us
elu_backward 14.852us 5854.980us 2 29.705us 11709.961us
addcdiv_ 19.028us 19.531us 4 76.113us 78.125us
expand 6.763us 1.038us 1 6.763us 1.038us
empty 10.887us 6.895us 6 65.322us 41.371us
MeanBackward1 43.171us 689.178us 1 43.171us 689.178us
SubBackward0 41.278us 2700.287us 1 41.278us 2700.287us
_convolution 112.326us 71519.468us 4 449.305us 286077.870us
contiguous 4.483us 2.036us 4 17.934us 8.146us
addcmul_ 21.761us 22.461us 4 87.043us 89.844us
sqrt 24.070us 24.902us 4 96.280us 99.609us
sub 31.379us 1949.677us 1 31.379us 1949.677us
Iter 5/5, loss: 1.178107738494873
----------------------------------- --------------- --------------- --------------- --------------- ---------------
Name CPU time CUDA time Calls CPU total CUDA total
----------------------------------- --------------- --------------- --------------- --------------- ---------------
convolution 109.769us 72765.561us 4 439.074us 291062.244us
add 29.190us 5999.966us 4 116.760us 23999.863us
torch::autograd::AccumulateGrad 14396.529us 14385.579us 6 86379.172us 86313.477us
is_floating_point 4.970us 3.906us 1 4.970us 3.906us
cudnn_convolution_backward 108.772us 5573916.893us 4 435.088us 22295667.572us
EluBackward 34.615us 6087.891us 2 69.231us 12175.781us
mul_ 21.493us 21.973us 8 171.941us 175.781us
elu_ 24.235us 4134.392us 2 48.470us 8268.784us
detach_ 3.482us 3.368us 4 13.928us 13.472us
add_ 27.648us 29.785us 12 331.781us 357.422us
torch::autograd::CopyBackwards 11370697.976us 49844.727us 2 22741395.952us 99689.453us
mean 67.377us 766.968us 1 67.377us 766.968us
mul 28.296us 1617.910us 3 84.889us 4853.729us
torch::autograd::GraphRoot 10.841us 2.045us 1 10.841us 2.045us
PowBackward0 121.538us 5020.691us 1 121.538us 5020.691us
cudnn_convolution 73.385us 72747.189us 4 293.542us 290988.754us
div 44.544us 684.052us 1 44.544us 684.052us
AddBackward0 3.737us 1.465us 2 7.474us 2.930us
pow 27.546us 1503.754us 2 55.093us 3007.507us
item 52.789us 52.734us 1 52.789us 52.734us
neg 23.675us 1350.677us 1 23.675us 1350.677us
conv3d 117.937us 72769.876us 4 471.746us 291079.504us
_local_scalar_dense 42.800us 42.969us 1 42.800us 42.969us
CudnnConvolutionBackward 121.100us 5573919.838us 4 484.401us 22295679.352us
zero_ 11.585us 11.672us 4 46.338us 46.688us
elu_backward 25.057us 6085.449us 2 50.114us 12170.898us
addcdiv_ 19.622us 20.508us 4 78.486us 82.031us
expand 10.650us 2.045us 1 10.650us 2.045us
empty 13.756us 5.621us 6 82.536us 33.726us
MeanBackward1 74.240us 689.148us 1 74.240us 689.148us
SubBackward0 64.431us 2703.369us 1 64.431us 2703.369us
_convolution 101.929us 72761.378us 4 407.716us 291045.511us
contiguous 4.172us 2.470us 4 16.690us 9.880us
addcmul_ 25.999us 26.367us 4 103.995us 105.469us
sqrt 27.662us 28.320us 4 110.647us 113.281us
sub 29.385us 1980.438us 1 29.385us 1980.438us
************************************** TENSOR SIZE (1, 2, 224, 224, 224) **************************************
Iter 1/5, loss: 1.1761516332626343
----------------------------------- --------------- --------------- --------------- --------------- ---------------
Name CPU time CUDA time Calls CPU total CUDA total
----------------------------------- --------------- --------------- --------------- --------------- ---------------
convolution 1236914.297us 1236134.580us 4 4947657.189us 4944538.319us
add 607.102us 3965.820us 4 2428.408us 15863.281us
torch::autograd::AccumulateGrad 29.050us 9.440us 6 174.298us 56.641us
is_floating_point 4.559us 3.906us 1 4.559us 3.906us
cudnn_convolution_backward 6989385.555us 7013367.920us 4 27957542.222us 28053471.680us
EluBackward 50.940us 3932.129us 2 101.881us 7864.258us
mul_ 20.517us 19.531us 8 164.138us 156.250us
elu_ 29.000us 2724.854us 2 58.000us 5449.707us
detach_ 2.161us 1.976us 4 8.645us 7.904us
add_ 22.463us 25.065us 12 269.555us 300.781us
torch::autograd::CopyBackwards 83260.132us 25505.859us 2 166520.263us 51011.719us
mean 113.083us 501.465us 1 113.083us 501.465us
mul 285.967us 1132.324us 3 857.901us 3396.973us
torch::autograd::GraphRoot 10.530us 5.371us 1 10.530us 5.371us
PowBackward0 1428.091us 3416.992us 1 1428.091us 3416.992us
cudnn_convolution 1236870.156us 1236119.782us 4 4947480.625us 4944479.128us
div 67.136us 497.559us 1 67.136us 497.559us
AddBackward0 4.103us 1.953us 2 8.206us 3.906us
pow 320.837us 1048.828us 2 641.675us 2097.656us
item 43.181us 42.969us 1 43.181us 42.969us
neg 37.340us 908.203us 1 37.340us 908.203us
conv3d 1236922.640us 1236137.450us 4 4947690.561us 4944549.800us
_local_scalar_dense 33.603us 31.250us 1 33.603us 31.250us
CudnnConvolutionBackward 6989401.335us 7013370.850us 4 27957605.340us 28053483.398us
zero_ 9.045us 9.136us 4 36.180us 36.544us
elu_backward 39.438us 3929.688us 2 78.877us 7859.375us
addcdiv_ 19.932us 20.508us 4 79.729us 82.031us
expand 16.431us 16.602us 1 16.431us 16.602us
empty 11.435us 22.589us 6 68.609us 135.535us
MeanBackward1 105.769us 527.832us 1 105.769us 527.832us
SubBackward0 97.673us 1956.055us 1 97.673us 1956.055us
_convolution 1236906.462us 1236131.641us 4 4947625.848us 4944526.563us
contiguous 3.239us 1.553us 4 12.955us 6.214us
addcmul_ 22.808us 23.438us 4 91.231us 93.750us
sqrt 25.165us 25.391us 4 100.659us 101.562us
sub 77.235us 1311.035us 1 77.235us 1311.035us
Iter 2/5, loss: 1.175890564918518
----------------------------------- --------------- --------------- --------------- --------------- ---------------
Name CPU time CUDA time Calls CPU total CUDA total
----------------------------------- --------------- --------------- --------------- --------------- ---------------
convolution 797.449us 48435.772us 4 3189.797us 193743.089us
add 224.852us 3962.364us 4 899.408us 15849.457us
torch::autograd::AccumulateGrad 9249.173us 9245.717us 6 55495.039us 55474.304us
is_floating_point 5.520us 5.127us 1 5.520us 5.127us
cudnn_convolution_backward 139.965us 174441.994us 4 559.861us 697767.975us
EluBackward 19.777us 3942.917us 2 39.554us 7885.834us
mul_ 19.295us 20.348us 8 154.359us 162.781us
elu_ 23.203us 2804.230us 2 46.407us 5608.459us
detach_ 4.075us 3.888us 4 16.300us 15.552us
add_ 27.226us 25.467us 12 326.714us 305.603us
torch::autograd::CopyBackwards 494411.110us 32771.393us 2 988822.220us 65542.786us
mean 47.419us 431.107us 1 47.419us 431.107us
mul 86.589us 1037.994us 3 259.767us 3113.983us
torch::autograd::GraphRoot 8.927us 1.022us 1 8.927us 1.022us
PowBackward0 278.924us 3159.042us 1 278.924us 3159.042us
cudnn_convolution 761.547us 48416.495us 4 3046.188us 193665.981us
div 25.618us 446.564us 1 25.618us 446.564us
AddBackward0 2.219us 1.556us 2 4.438us 3.113us
pow 18.560us 918.015us 2 37.120us 1836.029us
item 69.771us 68.665us 1 69.771us 68.665us
neg 19.707us 903.168us 1 19.707us 903.168us
conv3d 805.214us 48440.293us 4 3220.855us 193761.172us
_local_scalar_dense 57.678us 57.861us 1 57.678us 57.861us
CudnnConvolutionBackward 147.016us 174444.302us 4 588.063us 697777.206us
zero_ 12.844us 12.984us 4 51.377us 51.936us
elu_backward 14.417us 3940.353us 2 28.834us 7880.707us
addcdiv_ 25.941us 18.753us 4 103.764us 75.012us
expand 6.392us 2.045us 1 6.392us 2.045us
empty 9.033us 6.198us 6 54.200us 37.186us
MeanBackward1 43.692us 452.606us 1 43.692us 452.606us
SubBackward0 45.746us 1807.358us 1 45.746us 1807.358us
_convolution 790.243us 48431.719us 4 3160.973us 193726.874us
contiguous 4.008us 2.871us 4 16.031us 11.486us
addcmul_ 30.222us 61.081us 4 120.887us 244.324us
sqrt 29.069us 16.129us 4 116.278us 64.514us
sub 18.314us 1302.536us 1 18.314us 1302.536us
Iter 3/5, loss: 1.1756300926208496
----------------------------------- --------------- --------------- --------------- --------------- ---------------
Name CPU time CUDA time Calls CPU total CUDA total
----------------------------------- --------------- --------------- --------------- --------------- ---------------
convolution 116.248us 46487.119us 4 464.994us 185948.476us
add 22.570us 3933.716us 4 90.280us 15734.863us
torch::autograd::AccumulateGrad 8842.036us 8837.311us 6 53052.215us 53023.865us
is_floating_point 4.669us 4.272us 1 4.669us 4.272us
cudnn_convolution_backward 73.899us 173217.297us 4 295.595us 692869.186us
EluBackward 21.530us 3931.137us 2 43.060us 7862.274us
mul_ 19.465us 18.654us 8 155.722us 149.231us
elu_ 23.519us 2696.190us 2 47.038us 5392.380us
detach_ 4.203us 4.032us 4 16.811us 16.128us
add_ 24.377us 27.507us 12 292.529us 330.078us
torch::autograd::CopyBackwards 487694.991us 30620.026us 2 975389.982us 61240.051us
mean 64.531us 436.234us 1 64.531us 436.234us
mul 18.071us 1031.509us 3 54.213us 3094.528us
torch::autograd::GraphRoot 5.049us 2.045us 1 5.049us 2.045us
PowBackward0 80.541us 3103.745us 1 80.541us 3103.745us
cudnn_convolution 77.646us 46466.238us 4 310.583us 185864.951us
div 26.891us 447.479us 1 26.891us 447.479us
AddBackward0 2.364us 1.038us 2 4.728us 2.075us
pow 20.969us 904.190us 2 41.938us 1808.380us
item 57.548us 57.190us 1 57.548us 57.190us
neg 14.888us 895.996us 1 14.888us 895.996us
conv3d 125.055us 46490.504us 4 500.218us 185962.015us
_local_scalar_dense 47.980us 47.302us 1 47.980us 47.302us
CudnnConvolutionBackward 80.609us 173220.341us 4 322.436us 692881.363us
zero_ 13.869us 13.984us 4 55.474us 55.936us
elu_backward 15.629us 3929.077us 2 31.258us 7858.154us
addcdiv_ 20.206us 20.645us 4 80.822us 82.581us
expand 6.933us 2.060us 1 6.933us 2.060us
empty 10.876us 6.186us 6 65.253us 37.117us
MeanBackward1 46.186us 452.606us 1 46.186us 452.606us
SubBackward0 38.623us 1795.059us 1 38.623us 1795.059us
_convolution 108.145us 46482.273us 4 432.581us 185929.090us
contiguous 4.421us 2.145us 4 17.683us 8.579us
addcmul_ 22.678us 23.300us 4 90.711us 93.201us
sqrt 28.191us 28.915us 4 112.762us 115.662us
sub 26.008us 1310.715us 1 26.008us 1310.715us
Iter 4/5, loss: 1.1753698587417603
----------------------------------- --------------- --------------- --------------- --------------- ---------------
Name CPU time CUDA time Calls CPU total CUDA total
----------------------------------- --------------- --------------- --------------- --------------- ---------------
convolution 116.651us 46888.871us 4 466.606us 187555.486us
add 49.208us 3926.529us 4 196.831us 15706.116us
torch::autograd::AccumulateGrad 8926.092us 8920.151us 6 53556.550us 53520.905us
is_floating_point 6.072us 5.798us 1 6.072us 5.798us
cudnn_convolution_backward 70.928us 173593.330us 4 283.713us 694373.322us
EluBackward 23.459us 3928.055us 2 46.918us 7856.110us
mul_ 22.554us 21.828us 8 180.431us 174.622us
elu_ 21.129us 2700.798us 2 42.259us 5401.596us
detach_ 3.642us 3.496us 4 14.567us 13.984us
add_ 27.942us 31.041us 12 335.300us 372.498us
torch::autograd::CopyBackwards 488527.980us 30531.403us 2 977055.960us 61062.805us
mean 61.966us 428.024us 1 61.966us 428.024us
mul 17.196us 1044.484us 3 51.588us 3133.453us
torch::autograd::GraphRoot 5.090us 1.038us 1 5.090us 1.038us
PowBackward0 73.228us 3133.438us 1 73.228us 3133.438us
cudnn_convolution 71.765us 46870.141us 4 287.059us 187480.566us
div 37.220us 446.472us 1 37.220us 446.472us
AddBackward0 2.340us 2.045us 2 4.679us 4.089us
pow 20.854us 895.996us 2 41.708us 1791.992us
item 60.704us 59.998us 1 60.704us 59.998us
neg 14.828us 910.629us 1 14.828us 910.629us
conv3d 125.385us 46892.571us 4 501.542us 187570.285us
_local_scalar_dense 48.712us 47.974us 1 48.712us 47.974us
CudnnConvolutionBackward 79.269us 173596.680us 4 317.075us 694386.719us
zero_ 11.850us 11.992us 4 47.400us 47.968us
elu_backward 17.413us 3924.988us 2 34.826us 7849.976us
addcdiv_ 24.158us 25.864us 4 96.631us 103.455us
expand 7.293us 2.045us 1 7.293us 2.045us
empty 10.386us 6.703us 6 62.317us 40.219us
MeanBackward1 56.777us 451.584us 1 56.777us 451.584us
SubBackward0 39.133us 1815.552us 1 39.133us 1815.552us
_convolution 109.027us 46884.776us 4 436.110us 187539.105us
contiguous 8.406us 2.598us 4 33.624us 10.392us
addcmul_ 24.448us 24.857us 4 97.792us 99.426us
sqrt 31.066us 32.166us 4 124.263us 128.662us
sub 26.490us 1307.648us 1 26.490us 1307.648us
Iter 5/5, loss: 1.175110101699829
----------------------------------- --------------- --------------- --------------- --------------- ---------------
Name CPU time CUDA time Calls CPU total CUDA total
----------------------------------- --------------- --------------- --------------- --------------- ---------------
convolution 85.994us 47133.496us 4 343.976us 188533.985us
add 24.927us 3920.383us 4 99.708us 15681.534us
torch::autograd::AccumulateGrad 8494.010us 8483.210us 6 50964.062us 50899.261us
is_floating_point 6.081us 6.104us 1 6.081us 6.104us
cudnn_convolution_backward 125.165us 173178.101us 4 500.661us 692712.402us
EluBackward 37.551us 3927.551us 2 75.102us 7855.103us
mul_ 23.331us 23.003us 8 186.650us 184.021us
elu_ 17.918us 2693.626us 2 35.837us 5387.253us
detach_ 3.326us 3.208us 4 13.305us 12.832us
add_ 27.060us 29.989us 12 324.719us 359.863us
torch::autograd::CopyBackwards 488364.907us 29978.973us 2 976729.813us 59957.947us
mean 53.360us 431.107us 1 53.360us 431.107us
mul 28.347us 1118.891us 3 85.040us 3356.674us
torch::autograd::GraphRoot 6.743us 1.022us 1 6.743us 1.022us
PowBackward0 127.039us 3360.764us 1 127.039us 3360.764us
cudnn_convolution 58.665us 47116.391us 4 234.661us 188465.565us
div 88.406us 447.739us 1 88.406us 447.739us
AddBackward0 3.687us 1.038us 2 7.373us 2.075us
pow 29.014us 900.612us 2 58.029us 1801.224us
item 67.256us 66.772us 1 67.256us 66.772us
neg 23.304us 1208.313us 1 23.304us 1208.313us
conv3d 92.206us 47137.258us 4 368.822us 188549.031us
_local_scalar_dense 49.332us 49.072us 1 49.332us 49.072us
CudnnConvolutionBackward 138.998us 173181.194us 4 555.993us 692724.777us
zero_ 10.750us 10.920us 4 43.001us 43.680us
elu_backward 26.529us 3923.965us 2 53.059us 7847.931us
addcdiv_ 24.982us 26.031us 4 99.928us 104.126us
expand 15.599us 2.045us 1 15.599us 2.045us
empty 9.832us 5.973us 6 58.991us 35.836us
MeanBackward1 123.502us 453.629us 1 123.502us 453.629us
SubBackward0 62.267us 2116.623us 1 62.267us 2116.623us
_convolution 80.326us 47129.827us 4 321.302us 188519.307us
contiguous 3.246us 2.174us 4 12.985us 8.697us
addcmul_ 24.637us 25.116us 4 98.547us 100.464us
sqrt 29.778us 30.640us 4 119.112us 122.559us
sub 21.421us 1339.386us 1 21.421us 1339.386us
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import copy
class ConvBlock3d(nn.Module):
""" Define a basic block (conv => bn => relu ) """
def __init__(self, in_channels, out_channels):
super(ConvBlock3d, self).__init__()
self.conv = nn.Sequential(
nn.Conv3d(in_channels, out_channels, (3, 3, 3), padding=1, bias=False),
nn.ELU(inplace=True),
)
def forward(self, X):
x = self.conv(X) + X
return x
class Encoder(nn.Module):
def __init__(self, input_channels, nb_f_maps=6):
super(Encoder, self).__init__()
self.inp = nn.Sequential(
nn.Conv3d(input_channels, nb_f_maps, 5, 1, 2, bias=False),
ConvBlock3d(nb_f_maps, nb_f_maps),
ConvBlock3d(nb_f_maps, nb_f_maps),
)
def forward(self, X):
return self.inp(X)
class AE(nn.Module):
def __init__(self, input_channels, nb_classes, nb_f_maps=6):
super(AE, self).__init__()
self.encoder = Encoder(input_channels, nb_f_maps)
self.conv = nn.Conv3d(nb_f_maps, nb_classes, 1, bias=False)
def forward(self, X):
return self.conv(self.encoder(X))
torch.backends.cudnn.benchmark = True
if __name__ == "__main__":
model = AE(2, 2)
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-5)
volume = torch.randn(1, 2, 256, 256, 256, requires_grad=True).cuda()
target = torch.randn(1, 2, 256, 256, 256, requires_grad=True).cuda()
for i in range(5): # Epoch loop
with torch.autograd.profiler.profile(use_cuda=True) as prof:
optimizer.zero_grad()
res = model(volume)
loss = (res - target).pow(2).mean()
loss.backward()
optimizer.step()
print("Iter {}/{}, loss: {}".format(i+1, 5, loss.item()))
print(prof.key_averages())
volume = torch.randn(1, 2, 224, 224, 224, requires_grad=True).cuda()
target = torch.randn(1, 2, 224, 224, 224, requires_grad=True).cuda()
for i in range(5): # Epoch loop
with torch.autograd.profiler.profile(use_cuda=True) as prof:
optimizer.zero_grad()
res = model(volume)
loss = (res - target).pow(2).mean()
loss.backward()
optimizer.step()
print("Iter {}/{}, loss: {}".format(i+1, 5, loss.item()))
print(prof.key_averages())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment