Last active
December 8, 2018 14:14
-
-
Save czotti/4dc532927ba32481e5b538b99eff2894 to your computer and use it in GitHub Desktop.
Issues with speed pytorch 0.4.1 and 1.0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
************************************** TENSOR SIZE (1, 2, 256, 256, 256) ************************************** | |
Iter 1/5, loss: 1.1791579723358154 | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
Name CPU time CUDA time Calls CPU total CUDA total | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
convolution 1993651.502us 1992817.026us 4 7974606.009us 7971268.105us | |
add 1618.376us 6155.640us 4 6473.504us 24622.559us | |
torch::autograd::AccumulateGrad 6500.853us 7.487us 6 39005.119us 44.922us | |
is_floating_point 9.718us 7.812us 1 9.718us 7.812us | |
cudnn_convolution_backward 14125154.892us 15425690.918us 4 56500619.569us 61702763.672us | |
EluBackward 921.245us 6175.781us 2 1842.491us 12351.562us | |
mul_ 18.442us 17.578us 8 147.535us 140.625us | |
elu_ 591.556us 4349.609us 2 1183.112us 8699.219us | |
add_ 22.765us 26.042us 12 273.183us 312.500us | |
torch::autograd::CopyBackwards 2655614.432us 40296.875us 2 5311228.865us 80593.750us | |
mean 5391.290us 639.648us 1 5391.290us 639.648us | |
mul 333.253us 1546.875us 3 999.759us 4640.625us | |
torch::autograd::GraphRoot 22.302us 7.324us 1 22.302us 7.324us | |
PowBackward0 1596.300us 4652.832us 1 1596.300us 4652.832us | |
cudnn_convolution 1993596.314us 1992796.013us 4 7974385.254us 7971184.053us | |
div 106.370us 744.629us 1 106.370us 744.629us | |
AddBackward0 4.344us 0.000us 2 8.687us 0.000us | |
pow 2354.389us 1340.088us 2 4708.778us 2680.176us | |
item 62.608us 62.500us 1 62.608us 62.500us | |
neg 47.268us 1349.609us 1 47.268us 1349.609us | |
conv3d 1993968.818us 1993127.823us 4 7975875.273us 7972511.291us | |
_local_scalar_dense 46.377us 46.875us 1 46.377us 46.875us | |
SubBackward0 110.337us 2691.895us 1 110.337us 2691.895us | |
elu_backward 898.719us 6172.852us 2 1797.438us 12345.703us | |
addcdiv_ 21.728us 23.438us 4 86.912us 93.750us | |
expand 50.636us 44.922us 1 50.636us 44.922us | |
empty 14.302us 5.584us 6 85.811us 33.503us | |
MeanBackward1 3023.451us 3645.508us 1 3023.451us 3645.508us | |
CudnnConvolutionBackward 14125176.846us 15425693.115us 4 56500707.384us 61702772.461us | |
_convolution 1993640.512us 1992810.689us 4 7974562.046us 7971242.754us | |
contiguous 5.430us 2.779us 4 21.721us 11.115us | |
addcmul_ 26.447us 27.344us 4 105.788us 109.375us | |
sqrt 328.292us 330.078us 4 1313.167us 1320.312us | |
sub 95.299us 1945.801us 1 95.299us 1945.801us | |
Iter 2/5, loss: 1.1788948774337769 | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
Name CPU time CUDA time Calls CPU total CUDA total | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
convolution 699.150us 72173.980us 4 2796.602us 288695.921us | |
add 316.585us 6046.371us 4 1266.338us 24185.486us | |
torch::autograd::AccumulateGrad 12638.065us 12623.779us 6 75828.392us 75742.676us | |
is_floating_point 4.729us 3.906us 1 4.729us 3.906us | |
cudnn_convolution_backward 299.305us 5588712.349us 4 1197.218us 22354849.396us | |
EluBackward 47.975us 6209.229us 2 95.949us 12418.457us | |
mul_ 19.369us 19.043us 8 154.951us 152.344us | |
elu_ 25.809us 4032.509us 2 51.618us 8065.018us | |
detach_ 4.103us 3.968us 4 16.412us 15.872us | |
add_ 24.862us 27.832us 12 298.341us 333.984us | |
torch::autograd::CopyBackwards 11391234.402us 44634.766us 2 22782468.804us 89269.531us | |
mean 46.818us 633.850us 1 46.818us 633.850us | |
mul 199.558us 1555.878us 3 598.675us 4667.633us | |
torch::autograd::GraphRoot 8.897us 1.038us 1 8.897us 1.038us | |
PowBackward0 639.862us 4645.874us 1 639.862us 4645.874us | |
cudnn_convolution 664.946us 72155.972us 4 2659.785us 288623.887us | |
div 81.333us 689.148us 1 81.333us 689.148us | |
AddBackward0 3.917us 1.465us 2 7.834us 2.930us | |
pow 33.493us 1348.114us 2 66.986us 2696.228us | |
item 51.026us 50.781us 1 51.026us 50.781us | |
neg 28.785us 1337.341us 1 28.785us 1337.341us | |
conv3d 706.915us 72177.319us 4 2827.661us 288709.275us | |
_local_scalar_dense 41.238us 41.016us 1 41.238us 41.016us | |
CudnnConvolutionBackward 313.153us 5588715.538us 4 1252.611us 22354862.152us | |
zero_ 10.031us 10.176us 4 40.124us 40.704us | |
elu_backward 33.999us 6206.787us 2 67.998us 12413.574us | |
addcdiv_ 20.296us 20.508us 4 81.182us 82.031us | |
expand 18.264us 1.038us 1 18.264us 1.038us | |
empty 9.044us 4.731us 6 54.262us 28.386us | |
MeanBackward1 141.966us 695.312us 1 141.966us 695.312us | |
SubBackward0 84.930us 2718.719us 1 84.930us 2718.719us | |
_convolution 692.035us 72169.436us 4 2768.139us 288677.742us | |
contiguous 3.769us 1.936us 4 15.078us 7.745us | |
addcmul_ 26.239us 26.367us 4 104.957us 105.469us | |
sqrt 26.159us 26.367us 4 104.637us 105.469us | |
sub 21.841us 1951.752us 1 21.841us 1951.752us | |
Iter 3/5, loss: 1.1786322593688965 | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
Name CPU time CUDA time Calls CPU total CUDA total | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
convolution 115.953us 72240.726us 4 463.811us 288962.903us | |
add 26.778us 5852.512us 4 107.111us 23410.049us | |
torch::autograd::AccumulateGrad 12776.725us 12771.322us 6 76660.350us 76627.930us | |
is_floating_point 4.839us 3.906us 1 4.839us 3.906us | |
cudnn_convolution_backward 68.366us 5420700.531us 4 273.463us 21682802.124us | |
EluBackward 23.620us 5873.291us 2 47.239us 11746.582us | |
mul_ 18.426us 17.090us 8 147.405us 136.719us | |
elu_ 25.794us 4043.266us 2 51.587us 8086.533us | |
detach_ 3.637us 3.536us 4 14.548us 14.144us | |
add_ 23.967us 26.367us 12 287.602us 316.406us | |
torch::autograd::CopyBackwards 11058073.875us 45457.031us 2 22116147.749us 90914.062us | |
mean 71.585us 632.812us 1 71.585us 632.812us | |
mul 18.398us 1548.971us 3 55.194us 4646.912us | |
torch::autograd::GraphRoot 7.223us 2.045us 1 7.223us 2.045us | |
PowBackward0 75.963us 4649.963us 1 75.963us 4649.963us | |
cudnn_convolution 77.195us 72222.633us 4 308.780us 288890.531us | |
div 28.314us 684.021us 1 28.314us 684.021us | |
AddBackward0 2.365us 1.465us 2 4.729us 2.930us | |
pow 23.043us 1359.344us 2 46.087us 2718.689us | |
item 42.901us 41.016us 1 42.901us 41.016us | |
neg 14.948us 1357.819us 1 14.948us 1357.819us | |
conv3d 124.441us 72244.582us 4 497.765us 288978.326us | |
_local_scalar_dense 33.393us 33.203us 1 33.393us 33.203us | |
CudnnConvolutionBackward 75.600us 5420703.720us 4 302.398us 21682814.880us | |
zero_ 12.206us 12.312us 4 48.822us 49.248us | |
elu_backward 17.523us 5870.850us 2 35.045us 11741.699us | |
addcdiv_ 20.186us 20.508us 4 80.742us 82.031us | |
expand 6.963us 1.038us 1 6.963us 1.038us | |
empty 12.265us 7.218us 6 73.588us 43.308us | |
MeanBackward1 48.782us 689.148us 1 48.782us 689.148us | |
SubBackward0 44.032us 2709.503us 1 44.032us 2709.503us | |
_convolution 107.940us 72236.916us 4 431.761us 288947.665us | |
contiguous 4.273us 2.292us 4 17.092us 9.170us | |
addcmul_ 22.434us 22.949us 4 89.738us 91.797us | |
sqrt 24.807us 24.902us 4 99.226us 99.609us | |
sub 31.449us 1940.491us 1 31.449us 1940.491us | |
Iter 4/5, loss: 1.1783697605133057 | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
Name CPU time CUDA time Calls CPU total CUDA total | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
convolution 120.761us 71523.126us 4 483.046us 286092.502us | |
add 26.231us 5901.585us 4 104.926us 23606.339us | |
torch::autograd::AccumulateGrad 13553.335us 13550.130us 6 81320.012us 81300.781us | |
is_floating_point 5.019us 3.906us 1 5.019us 3.906us | |
cudnn_convolution_backward 62.184us 5440159.782us 4 248.737us 21760639.130us | |
EluBackward 22.026us 5857.666us 2 44.052us 11715.332us | |
mul_ 18.000us 17.090us 8 144.000us 136.719us | |
elu_ 38.648us 4153.336us 2 77.296us 8306.671us | |
detach_ 3.740us 3.656us 4 14.959us 14.624us | |
add_ 21.632us 24.251us 12 259.586us 291.016us | |
torch::autograd::CopyBackwards 11099595.407us 49291.016us 2 22199190.814us 98582.031us | |
mean 70.432us 629.761us 1 70.432us 629.761us | |
mul 15.599us 1548.960us 3 46.798us 4646.881us | |
torch::autograd::GraphRoot 7.865us 1.007us 1 7.865us 1.007us | |
PowBackward0 67.627us 4671.478us 1 67.627us 4671.478us | |
cudnn_convolution 81.320us 71505.100us 4 325.282us 286020.400us | |
div 25.087us 684.021us 1 25.087us 684.021us | |
AddBackward0 2.184us 1.953us 2 4.368us 3.906us | |
pow 26.374us 1354.767us 2 52.749us 2709.534us | |
item 43.441us 42.969us 1 43.441us 42.969us | |
neg 16.912us 1345.520us 1 16.912us 1345.520us | |
conv3d 129.679us 71527.043us 4 518.715us 286108.173us | |
_local_scalar_dense 34.405us 33.203us 1 34.405us 33.203us | |
CudnnConvolutionBackward 69.417us 5440162.598us 4 277.670us 21760650.391us | |
zero_ 12.241us 12.360us 4 48.962us 49.440us | |
elu_backward 14.852us 5854.980us 2 29.705us 11709.961us | |
addcdiv_ 19.028us 19.531us 4 76.113us 78.125us | |
expand 6.763us 1.038us 1 6.763us 1.038us | |
empty 10.887us 6.895us 6 65.322us 41.371us | |
MeanBackward1 43.171us 689.178us 1 43.171us 689.178us | |
SubBackward0 41.278us 2700.287us 1 41.278us 2700.287us | |
_convolution 112.326us 71519.468us 4 449.305us 286077.870us | |
contiguous 4.483us 2.036us 4 17.934us 8.146us | |
addcmul_ 21.761us 22.461us 4 87.043us 89.844us | |
sqrt 24.070us 24.902us 4 96.280us 99.609us | |
sub 31.379us 1949.677us 1 31.379us 1949.677us | |
Iter 5/5, loss: 1.178107738494873 | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
Name CPU time CUDA time Calls CPU total CUDA total | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
convolution 109.769us 72765.561us 4 439.074us 291062.244us | |
add 29.190us 5999.966us 4 116.760us 23999.863us | |
torch::autograd::AccumulateGrad 14396.529us 14385.579us 6 86379.172us 86313.477us | |
is_floating_point 4.970us 3.906us 1 4.970us 3.906us | |
cudnn_convolution_backward 108.772us 5573916.893us 4 435.088us 22295667.572us | |
EluBackward 34.615us 6087.891us 2 69.231us 12175.781us | |
mul_ 21.493us 21.973us 8 171.941us 175.781us | |
elu_ 24.235us 4134.392us 2 48.470us 8268.784us | |
detach_ 3.482us 3.368us 4 13.928us 13.472us | |
add_ 27.648us 29.785us 12 331.781us 357.422us | |
torch::autograd::CopyBackwards 11370697.976us 49844.727us 2 22741395.952us 99689.453us | |
mean 67.377us 766.968us 1 67.377us 766.968us | |
mul 28.296us 1617.910us 3 84.889us 4853.729us | |
torch::autograd::GraphRoot 10.841us 2.045us 1 10.841us 2.045us | |
PowBackward0 121.538us 5020.691us 1 121.538us 5020.691us | |
cudnn_convolution 73.385us 72747.189us 4 293.542us 290988.754us | |
div 44.544us 684.052us 1 44.544us 684.052us | |
AddBackward0 3.737us 1.465us 2 7.474us 2.930us | |
pow 27.546us 1503.754us 2 55.093us 3007.507us | |
item 52.789us 52.734us 1 52.789us 52.734us | |
neg 23.675us 1350.677us 1 23.675us 1350.677us | |
conv3d 117.937us 72769.876us 4 471.746us 291079.504us | |
_local_scalar_dense 42.800us 42.969us 1 42.800us 42.969us | |
CudnnConvolutionBackward 121.100us 5573919.838us 4 484.401us 22295679.352us | |
zero_ 11.585us 11.672us 4 46.338us 46.688us | |
elu_backward 25.057us 6085.449us 2 50.114us 12170.898us | |
addcdiv_ 19.622us 20.508us 4 78.486us 82.031us | |
expand 10.650us 2.045us 1 10.650us 2.045us | |
empty 13.756us 5.621us 6 82.536us 33.726us | |
MeanBackward1 74.240us 689.148us 1 74.240us 689.148us | |
SubBackward0 64.431us 2703.369us 1 64.431us 2703.369us | |
_convolution 101.929us 72761.378us 4 407.716us 291045.511us | |
contiguous 4.172us 2.470us 4 16.690us 9.880us | |
addcmul_ 25.999us 26.367us 4 103.995us 105.469us | |
sqrt 27.662us 28.320us 4 110.647us 113.281us | |
sub 29.385us 1980.438us 1 29.385us 1980.438us | |
************************************** TENSOR SIZE (1, 2, 224, 224, 224) ************************************** | |
Iter 1/5, loss: 1.1761516332626343 | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
Name CPU time CUDA time Calls CPU total CUDA total | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
convolution 1236914.297us 1236134.580us 4 4947657.189us 4944538.319us | |
add 607.102us 3965.820us 4 2428.408us 15863.281us | |
torch::autograd::AccumulateGrad 29.050us 9.440us 6 174.298us 56.641us | |
is_floating_point 4.559us 3.906us 1 4.559us 3.906us | |
cudnn_convolution_backward 6989385.555us 7013367.920us 4 27957542.222us 28053471.680us | |
EluBackward 50.940us 3932.129us 2 101.881us 7864.258us | |
mul_ 20.517us 19.531us 8 164.138us 156.250us | |
elu_ 29.000us 2724.854us 2 58.000us 5449.707us | |
detach_ 2.161us 1.976us 4 8.645us 7.904us | |
add_ 22.463us 25.065us 12 269.555us 300.781us | |
torch::autograd::CopyBackwards 83260.132us 25505.859us 2 166520.263us 51011.719us | |
mean 113.083us 501.465us 1 113.083us 501.465us | |
mul 285.967us 1132.324us 3 857.901us 3396.973us | |
torch::autograd::GraphRoot 10.530us 5.371us 1 10.530us 5.371us | |
PowBackward0 1428.091us 3416.992us 1 1428.091us 3416.992us | |
cudnn_convolution 1236870.156us 1236119.782us 4 4947480.625us 4944479.128us | |
div 67.136us 497.559us 1 67.136us 497.559us | |
AddBackward0 4.103us 1.953us 2 8.206us 3.906us | |
pow 320.837us 1048.828us 2 641.675us 2097.656us | |
item 43.181us 42.969us 1 43.181us 42.969us | |
neg 37.340us 908.203us 1 37.340us 908.203us | |
conv3d 1236922.640us 1236137.450us 4 4947690.561us 4944549.800us | |
_local_scalar_dense 33.603us 31.250us 1 33.603us 31.250us | |
CudnnConvolutionBackward 6989401.335us 7013370.850us 4 27957605.340us 28053483.398us | |
zero_ 9.045us 9.136us 4 36.180us 36.544us | |
elu_backward 39.438us 3929.688us 2 78.877us 7859.375us | |
addcdiv_ 19.932us 20.508us 4 79.729us 82.031us | |
expand 16.431us 16.602us 1 16.431us 16.602us | |
empty 11.435us 22.589us 6 68.609us 135.535us | |
MeanBackward1 105.769us 527.832us 1 105.769us 527.832us | |
SubBackward0 97.673us 1956.055us 1 97.673us 1956.055us | |
_convolution 1236906.462us 1236131.641us 4 4947625.848us 4944526.563us | |
contiguous 3.239us 1.553us 4 12.955us 6.214us | |
addcmul_ 22.808us 23.438us 4 91.231us 93.750us | |
sqrt 25.165us 25.391us 4 100.659us 101.562us | |
sub 77.235us 1311.035us 1 77.235us 1311.035us | |
Iter 2/5, loss: 1.175890564918518 | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
Name CPU time CUDA time Calls CPU total CUDA total | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
convolution 797.449us 48435.772us 4 3189.797us 193743.089us | |
add 224.852us 3962.364us 4 899.408us 15849.457us | |
torch::autograd::AccumulateGrad 9249.173us 9245.717us 6 55495.039us 55474.304us | |
is_floating_point 5.520us 5.127us 1 5.520us 5.127us | |
cudnn_convolution_backward 139.965us 174441.994us 4 559.861us 697767.975us | |
EluBackward 19.777us 3942.917us 2 39.554us 7885.834us | |
mul_ 19.295us 20.348us 8 154.359us 162.781us | |
elu_ 23.203us 2804.230us 2 46.407us 5608.459us | |
detach_ 4.075us 3.888us 4 16.300us 15.552us | |
add_ 27.226us 25.467us 12 326.714us 305.603us | |
torch::autograd::CopyBackwards 494411.110us 32771.393us 2 988822.220us 65542.786us | |
mean 47.419us 431.107us 1 47.419us 431.107us | |
mul 86.589us 1037.994us 3 259.767us 3113.983us | |
torch::autograd::GraphRoot 8.927us 1.022us 1 8.927us 1.022us | |
PowBackward0 278.924us 3159.042us 1 278.924us 3159.042us | |
cudnn_convolution 761.547us 48416.495us 4 3046.188us 193665.981us | |
div 25.618us 446.564us 1 25.618us 446.564us | |
AddBackward0 2.219us 1.556us 2 4.438us 3.113us | |
pow 18.560us 918.015us 2 37.120us 1836.029us | |
item 69.771us 68.665us 1 69.771us 68.665us | |
neg 19.707us 903.168us 1 19.707us 903.168us | |
conv3d 805.214us 48440.293us 4 3220.855us 193761.172us | |
_local_scalar_dense 57.678us 57.861us 1 57.678us 57.861us | |
CudnnConvolutionBackward 147.016us 174444.302us 4 588.063us 697777.206us | |
zero_ 12.844us 12.984us 4 51.377us 51.936us | |
elu_backward 14.417us 3940.353us 2 28.834us 7880.707us | |
addcdiv_ 25.941us 18.753us 4 103.764us 75.012us | |
expand 6.392us 2.045us 1 6.392us 2.045us | |
empty 9.033us 6.198us 6 54.200us 37.186us | |
MeanBackward1 43.692us 452.606us 1 43.692us 452.606us | |
SubBackward0 45.746us 1807.358us 1 45.746us 1807.358us | |
_convolution 790.243us 48431.719us 4 3160.973us 193726.874us | |
contiguous 4.008us 2.871us 4 16.031us 11.486us | |
addcmul_ 30.222us 61.081us 4 120.887us 244.324us | |
sqrt 29.069us 16.129us 4 116.278us 64.514us | |
sub 18.314us 1302.536us 1 18.314us 1302.536us | |
Iter 3/5, loss: 1.1756300926208496 | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
Name CPU time CUDA time Calls CPU total CUDA total | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
convolution 116.248us 46487.119us 4 464.994us 185948.476us | |
add 22.570us 3933.716us 4 90.280us 15734.863us | |
torch::autograd::AccumulateGrad 8842.036us 8837.311us 6 53052.215us 53023.865us | |
is_floating_point 4.669us 4.272us 1 4.669us 4.272us | |
cudnn_convolution_backward 73.899us 173217.297us 4 295.595us 692869.186us | |
EluBackward 21.530us 3931.137us 2 43.060us 7862.274us | |
mul_ 19.465us 18.654us 8 155.722us 149.231us | |
elu_ 23.519us 2696.190us 2 47.038us 5392.380us | |
detach_ 4.203us 4.032us 4 16.811us 16.128us | |
add_ 24.377us 27.507us 12 292.529us 330.078us | |
torch::autograd::CopyBackwards 487694.991us 30620.026us 2 975389.982us 61240.051us | |
mean 64.531us 436.234us 1 64.531us 436.234us | |
mul 18.071us 1031.509us 3 54.213us 3094.528us | |
torch::autograd::GraphRoot 5.049us 2.045us 1 5.049us 2.045us | |
PowBackward0 80.541us 3103.745us 1 80.541us 3103.745us | |
cudnn_convolution 77.646us 46466.238us 4 310.583us 185864.951us | |
div 26.891us 447.479us 1 26.891us 447.479us | |
AddBackward0 2.364us 1.038us 2 4.728us 2.075us | |
pow 20.969us 904.190us 2 41.938us 1808.380us | |
item 57.548us 57.190us 1 57.548us 57.190us | |
neg 14.888us 895.996us 1 14.888us 895.996us | |
conv3d 125.055us 46490.504us 4 500.218us 185962.015us | |
_local_scalar_dense 47.980us 47.302us 1 47.980us 47.302us | |
CudnnConvolutionBackward 80.609us 173220.341us 4 322.436us 692881.363us | |
zero_ 13.869us 13.984us 4 55.474us 55.936us | |
elu_backward 15.629us 3929.077us 2 31.258us 7858.154us | |
addcdiv_ 20.206us 20.645us 4 80.822us 82.581us | |
expand 6.933us 2.060us 1 6.933us 2.060us | |
empty 10.876us 6.186us 6 65.253us 37.117us | |
MeanBackward1 46.186us 452.606us 1 46.186us 452.606us | |
SubBackward0 38.623us 1795.059us 1 38.623us 1795.059us | |
_convolution 108.145us 46482.273us 4 432.581us 185929.090us | |
contiguous 4.421us 2.145us 4 17.683us 8.579us | |
addcmul_ 22.678us 23.300us 4 90.711us 93.201us | |
sqrt 28.191us 28.915us 4 112.762us 115.662us | |
sub 26.008us 1310.715us 1 26.008us 1310.715us | |
Iter 4/5, loss: 1.1753698587417603 | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
Name CPU time CUDA time Calls CPU total CUDA total | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
convolution 116.651us 46888.871us 4 466.606us 187555.486us | |
add 49.208us 3926.529us 4 196.831us 15706.116us | |
torch::autograd::AccumulateGrad 8926.092us 8920.151us 6 53556.550us 53520.905us | |
is_floating_point 6.072us 5.798us 1 6.072us 5.798us | |
cudnn_convolution_backward 70.928us 173593.330us 4 283.713us 694373.322us | |
EluBackward 23.459us 3928.055us 2 46.918us 7856.110us | |
mul_ 22.554us 21.828us 8 180.431us 174.622us | |
elu_ 21.129us 2700.798us 2 42.259us 5401.596us | |
detach_ 3.642us 3.496us 4 14.567us 13.984us | |
add_ 27.942us 31.041us 12 335.300us 372.498us | |
torch::autograd::CopyBackwards 488527.980us 30531.403us 2 977055.960us 61062.805us | |
mean 61.966us 428.024us 1 61.966us 428.024us | |
mul 17.196us 1044.484us 3 51.588us 3133.453us | |
torch::autograd::GraphRoot 5.090us 1.038us 1 5.090us 1.038us | |
PowBackward0 73.228us 3133.438us 1 73.228us 3133.438us | |
cudnn_convolution 71.765us 46870.141us 4 287.059us 187480.566us | |
div 37.220us 446.472us 1 37.220us 446.472us | |
AddBackward0 2.340us 2.045us 2 4.679us 4.089us | |
pow 20.854us 895.996us 2 41.708us 1791.992us | |
item 60.704us 59.998us 1 60.704us 59.998us | |
neg 14.828us 910.629us 1 14.828us 910.629us | |
conv3d 125.385us 46892.571us 4 501.542us 187570.285us | |
_local_scalar_dense 48.712us 47.974us 1 48.712us 47.974us | |
CudnnConvolutionBackward 79.269us 173596.680us 4 317.075us 694386.719us | |
zero_ 11.850us 11.992us 4 47.400us 47.968us | |
elu_backward 17.413us 3924.988us 2 34.826us 7849.976us | |
addcdiv_ 24.158us 25.864us 4 96.631us 103.455us | |
expand 7.293us 2.045us 1 7.293us 2.045us | |
empty 10.386us 6.703us 6 62.317us 40.219us | |
MeanBackward1 56.777us 451.584us 1 56.777us 451.584us | |
SubBackward0 39.133us 1815.552us 1 39.133us 1815.552us | |
_convolution 109.027us 46884.776us 4 436.110us 187539.105us | |
contiguous 8.406us 2.598us 4 33.624us 10.392us | |
addcmul_ 24.448us 24.857us 4 97.792us 99.426us | |
sqrt 31.066us 32.166us 4 124.263us 128.662us | |
sub 26.490us 1307.648us 1 26.490us 1307.648us | |
Iter 5/5, loss: 1.175110101699829 | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
Name CPU time CUDA time Calls CPU total CUDA total | |
----------------------------------- --------------- --------------- --------------- --------------- --------------- | |
convolution 85.994us 47133.496us 4 343.976us 188533.985us | |
add 24.927us 3920.383us 4 99.708us 15681.534us | |
torch::autograd::AccumulateGrad 8494.010us 8483.210us 6 50964.062us 50899.261us | |
is_floating_point 6.081us 6.104us 1 6.081us 6.104us | |
cudnn_convolution_backward 125.165us 173178.101us 4 500.661us 692712.402us | |
EluBackward 37.551us 3927.551us 2 75.102us 7855.103us | |
mul_ 23.331us 23.003us 8 186.650us 184.021us | |
elu_ 17.918us 2693.626us 2 35.837us 5387.253us | |
detach_ 3.326us 3.208us 4 13.305us 12.832us | |
add_ 27.060us 29.989us 12 324.719us 359.863us | |
torch::autograd::CopyBackwards 488364.907us 29978.973us 2 976729.813us 59957.947us | |
mean 53.360us 431.107us 1 53.360us 431.107us | |
mul 28.347us 1118.891us 3 85.040us 3356.674us | |
torch::autograd::GraphRoot 6.743us 1.022us 1 6.743us 1.022us | |
PowBackward0 127.039us 3360.764us 1 127.039us 3360.764us | |
cudnn_convolution 58.665us 47116.391us 4 234.661us 188465.565us | |
div 88.406us 447.739us 1 88.406us 447.739us | |
AddBackward0 3.687us 1.038us 2 7.373us 2.075us | |
pow 29.014us 900.612us 2 58.029us 1801.224us | |
item 67.256us 66.772us 1 67.256us 66.772us | |
neg 23.304us 1208.313us 1 23.304us 1208.313us | |
conv3d 92.206us 47137.258us 4 368.822us 188549.031us | |
_local_scalar_dense 49.332us 49.072us 1 49.332us 49.072us | |
CudnnConvolutionBackward 138.998us 173181.194us 4 555.993us 692724.777us | |
zero_ 10.750us 10.920us 4 43.001us 43.680us | |
elu_backward 26.529us 3923.965us 2 53.059us 7847.931us | |
addcdiv_ 24.982us 26.031us 4 99.928us 104.126us | |
expand 15.599us 2.045us 1 15.599us 2.045us | |
empty 9.832us 5.973us 6 58.991us 35.836us | |
MeanBackward1 123.502us 453.629us 1 123.502us 453.629us | |
SubBackward0 62.267us 2116.623us 1 62.267us 2116.623us | |
_convolution 80.326us 47129.827us 4 321.302us 188519.307us | |
contiguous 3.246us 2.174us 4 12.985us 8.697us | |
addcmul_ 24.637us 25.116us 4 98.547us 100.464us | |
sqrt 29.778us 30.640us 4 119.112us 122.559us | |
sub 21.421us 1339.386us 1 21.421us 1339.386us |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
import copy | |
class ConvBlock3d(nn.Module): | |
""" Define a basic block (conv => bn => relu ) """ | |
def __init__(self, in_channels, out_channels): | |
super(ConvBlock3d, self).__init__() | |
self.conv = nn.Sequential( | |
nn.Conv3d(in_channels, out_channels, (3, 3, 3), padding=1, bias=False), | |
nn.ELU(inplace=True), | |
) | |
def forward(self, X): | |
x = self.conv(X) + X | |
return x | |
class Encoder(nn.Module): | |
def __init__(self, input_channels, nb_f_maps=6): | |
super(Encoder, self).__init__() | |
self.inp = nn.Sequential( | |
nn.Conv3d(input_channels, nb_f_maps, 5, 1, 2, bias=False), | |
ConvBlock3d(nb_f_maps, nb_f_maps), | |
ConvBlock3d(nb_f_maps, nb_f_maps), | |
) | |
def forward(self, X): | |
return self.inp(X) | |
class AE(nn.Module): | |
def __init__(self, input_channels, nb_classes, nb_f_maps=6): | |
super(AE, self).__init__() | |
self.encoder = Encoder(input_channels, nb_f_maps) | |
self.conv = nn.Conv3d(nb_f_maps, nb_classes, 1, bias=False) | |
def forward(self, X): | |
return self.conv(self.encoder(X)) | |
torch.backends.cudnn.benchmark = True | |
if __name__ == "__main__": | |
model = AE(2, 2) | |
model.cuda() | |
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-5) | |
volume = torch.randn(1, 2, 256, 256, 256, requires_grad=True).cuda() | |
target = torch.randn(1, 2, 256, 256, 256, requires_grad=True).cuda() | |
for i in range(5): # Epoch loop | |
with torch.autograd.profiler.profile(use_cuda=True) as prof: | |
optimizer.zero_grad() | |
res = model(volume) | |
loss = (res - target).pow(2).mean() | |
loss.backward() | |
optimizer.step() | |
print("Iter {}/{}, loss: {}".format(i+1, 5, loss.item())) | |
print(prof.key_averages()) | |
volume = torch.randn(1, 2, 224, 224, 224, requires_grad=True).cuda() | |
target = torch.randn(1, 2, 224, 224, 224, requires_grad=True).cuda() | |
for i in range(5): # Epoch loop | |
with torch.autograd.profiler.profile(use_cuda=True) as prof: | |
optimizer.zero_grad() | |
res = model(volume) | |
loss = (res - target).pow(2).mean() | |
loss.backward() | |
optimizer.step() | |
print("Iter {}/{}, loss: {}".format(i+1, 5, loss.item())) | |
print(prof.key_averages()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment