Created
February 3, 2021 09:19
-
-
Save ilia-cher/1fa37ad4a5149fb5eeda1245c5583c69 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(pytorch) iliacher@devgpu083:~/local/pytorch (activities_default)$ python test_resnet50.py | |
Files already downloaded and verified | |
step:0 | |
step:1 | |
step:2 | |
step:3 | |
step:4 | |
step:5 | |
step:6 | |
step:7 | |
step:8 | |
step:9 | |
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ | |
Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls | |
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ | |
aten::thnn_conv2d_backward 8.25% 322.645ms 31.37% 1.226s 3.856ms 1.495s 61.17% 1.496s 4.705ms 318 | |
aten::thnn_conv2d_forward 6.14% 239.948ms 13.05% 509.987ms 1.604ms 574.360ms 23.50% 574.360ms 1.806ms 318 | |
void at::native::col2im_kernel<float, float>(long, f... 0.00% 0.000us 0.00% 0.000us 0.000us 509.336ms 20.84% 509.336ms 50.474us 10091 | |
sgemm_32x32x32_NT_vec 0.00% 0.000us 0.00% 0.000us 0.000us 348.843ms 14.27% 348.843ms 50.896us 6854 | |
sgemm_32x32x32_NN_vec 0.00% 0.000us 0.00% 0.000us 0.000us 266.423ms 10.90% 266.423ms 35.552us 7494 | |
void at::native::im2col_kernel<float>(long, float co... 0.00% 0.000us 0.00% 0.000us 0.000us 246.419ms 10.08% 246.419ms 37.382us 6592 | |
sgemm_32x32x32_TN_vec 0.00% 0.000us 0.00% 0.000us 0.000us 184.302ms 7.54% 184.302ms 28.748us 6411 | |
sgemm_32x32x32_NN 0.00% 0.000us 0.00% 0.000us 0.000us 151.835ms 6.21% 151.835ms 87.867us 1728 | |
maxwell_sgemm_128x64_tn 0.00% 0.000us 0.00% 0.000us 0.000us 106.377ms 4.35% 106.377ms 42.619us 2496 | |
sgemm_32x32x32_TN 0.00% 0.000us 0.00% 0.000us 0.000us 106.306ms 4.35% 106.306ms 61.520us 1728 | |
void at::native::batch_norm_backward_kernel<float, f... 0.00% 0.000us 0.00% 0.000us 0.000us 99.966ms 4.09% 99.966ms 310.453us 322 | |
aten::native_batch_norm_backward 0.28% 10.982ms 0.76% 29.813ms 93.752us 96.933ms 3.97% 96.933ms 304.821us 318 | |
aten::native_batch_norm 0.42% 16.396ms 0.88% 34.254ms 107.717us 72.668ms 2.97% 72.668ms 228.516us 318 | |
maxwell_sgemm_128x64_nt 0.00% 0.000us 0.00% 0.000us 0.000us 72.624ms 2.97% 72.624ms 37.091us 1958 | |
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 68.091ms 2.79% 68.091ms 115.213us 591 | |
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 59.078ms 2.42% 59.078ms 17.244us 3426 | |
sgemm_128x128x8_NT 0.00% 0.000us 0.00% 0.000us 0.000us 49.223ms 2.01% 49.223ms 64.092us 768 | |
aten::threshold_backward 0.17% 6.594ms 0.32% 12.576ms 42.776us 39.858ms 1.63% 39.858ms 135.571us 294 | |
void at::native::batch_norm_collect_statistics_kerne... 0.00% 0.000us 0.00% 0.000us 0.000us 38.091ms 1.56% 38.091ms 119.783us 318 | |
void at::native::batch_norm_transform_input_kernel<f... 0.00% 0.000us 0.00% 0.000us 0.000us 34.577ms 1.41% 34.577ms 108.733us 318 | |
aten::add_ 1.15% 44.860ms 5.02% 196.377ms 65.590us 34.567ms 1.41% 34.567ms 11.545us 2994 | |
aten::threshold_ 0.10% 3.749ms 0.20% 7.971ms 27.112us 27.406ms 1.12% 27.406ms 93.218us 294 | |
maxwell_sgemm_128x64_nn 0.00% 0.000us 0.00% 0.000us 0.000us 24.644ms 1.01% 24.644ms 25.671us 960 | |
void sgemm_largek_lds64<true, false, 5, 5, 4, 4, 4, ... 0.00% 0.000us 0.00% 0.000us 0.000us 21.423ms 0.88% 21.423ms 95.638us 224 | |
Memcpy HtoD (Pageable -> Device) 0.00% 0.000us 0.00% 0.000us 0.000us 18.097ms 0.74% 18.097ms 76.682us 236 | |
aten::copy_ 0.69% 27.042ms 1.24% 48.288ms 82.122us 17.864ms 0.73% 17.864ms 30.381us 588 | |
void at::native::(anonymous namespace)::max_pool_bac... 0.00% 0.000us 0.00% 0.000us 0.000us 13.865ms 0.57% 13.865ms 1.981ms 7 | |
aten::max_pool2d_with_indices_backward 0.01% 197.000us 0.02% 684.000us 114.000us 11.886ms 0.49% 12.914ms 2.152ms 6 | |
Memset (Device) 0.00% 0.000us 0.00% 0.000us 0.000us 11.047ms 0.45% 11.047ms 1.085us 10182 | |
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 4.074ms 0.17% 4.074ms 3.131us 1301 | |
aten::fill_ 0.26% 10.354ms 0.63% 24.759ms 19.104us 3.898ms 0.16% 3.898ms 3.008us 1296 | |
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 3.512ms 0.14% 3.512ms 3.116us 1127 | |
aten::mul_ 0.41% 15.957ms 1.78% 69.576ms 72.025us 3.011ms 0.12% 3.011ms 3.117us 966 | |
aten::max_pool2d_with_indices 0.01% 316.000us 0.01% 558.000us 93.000us 2.965ms 0.12% 2.965ms 494.167us 6 | |
void at::native::(anonymous namespace)::max_pool_for... 0.00% 0.000us 0.00% 0.000us 0.000us 2.965ms 0.12% 2.965ms 494.167us 6 [37/1909] | |
aten::mean 0.01% 261.000us 0.01% 419.000us 69.833us 695.000us 0.03% 695.000us 115.833us 6 | |
void at::native::reduce_kernel<512, 1, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 695.000us 0.03% 695.000us 115.833us 6 | |
void scal_kernel<float, float, 1, false, 6, 5, 5, 3>... 0.00% 0.000us 0.00% 0.000us 0.000us 643.000us 0.03% 643.000us 2.871us 224 | |
aten::mm 0.01% 330.000us 0.01% 583.000us 48.583us 482.000us 0.02% 482.000us 40.167us 12 | |
aten::div 0.52% 20.235ms 0.61% 23.698ms 119.687us 456.000us 0.02% 456.000us 2.303us 198 | |
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 456.000us 0.02% 456.000us 76.000us 6 | |
aten::addmm 0.01% 431.000us 0.02% 762.000us 127.000us 450.000us 0.02% 450.000us 75.000us 6 | |
void kernelPointwiseApply1<TensorFillOp<float>, floa... 0.00% 0.000us 0.00% 0.000us 0.000us 400.000us 0.02% 400.000us 1.238us 323 | |
aten::add 0.21% 8.018ms 0.34% 13.350ms 41.981us 391.000us 0.02% 391.000us 1.230us 318 | |
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 391.000us 0.02% 391.000us 1.230us 318 | |
aten::_log_softmax 0.00% 171.000us 0.01% 418.000us 69.667us 94.000us 0.00% 94.000us 15.667us 6 | |
void (anonymous namespace)::softmax_warp_forward<flo... 0.00% 0.000us 0.00% 0.000us 0.000us 94.000us 0.00% 94.000us 15.667us 6 | |
void at::native::reduce_kernel<128, 4, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 84.000us 0.00% 84.000us 14.000us 6 | |
aten::_log_softmax_backward_data 0.00% 170.000us 0.01% 462.000us 77.000us 83.000us 0.00% 83.000us 13.833us 6 | |
void (anonymous namespace)::softmax_warp_backward<fl... 0.00% 0.000us 0.00% 0.000us 0.000us 83.000us 0.00% 83.000us 13.833us 6 | |
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 55.000us 0.00% 55.000us 9.167us 6 | |
aten::nll_loss_forward 0.01% 280.000us 0.01% 384.000us 64.000us 34.000us 0.00% 34.000us 5.667us 6 | |
void cunn_ClassNLLCriterion_updateOutput_kernel<floa... 0.00% 0.000us 0.00% 0.000us 0.000us 34.000us 0.00% 34.000us 5.667us 6 | |
aten::nll_loss_backward 0.01% 233.000us 0.01% 494.000us 82.333us 25.000us 0.00% 25.000us 4.167us 6 | |
void cunn_ClassNLLCriterion_updateGradInput_kernel<f... 0.00% 0.000us 0.00% 0.000us 0.000us 19.000us 0.00% 19.000us 3.167us 6 | |
aten::zeros 0.01% 510.000us 0.02% 650.000us 27.083us 0.000us 0.00% 0.000us 0.000us 24 | |
aten::empty 0.48% 18.886ms 0.48% 18.886ms 4.911us 0.000us 0.00% 0.000us 0.000us 3846 | |
aten::zero_ 0.30% 11.740ms 0.93% 36.334ms 27.651us 0.000us 0.00% 3.892ms 2.962us 1314 | |
ProfilerStep* 36.30% 1.419s 65.70% 2.568s 428.021ms 0.000us 0.00% 732.149ms 122.025ms 6 | |
enumerate(DataLoader)#_SingleProcessDataLoaderIter._... 4.00% 156.369ms 6.31% 246.581ms 41.097ms 0.000us 0.00% 0.000us 0.000us 6 | |
aten::set_ 4.10% 160.433ms 4.10% 160.433ms 2.636us 0.000us 0.00% 0.000us 0.000us 60864 | |
aten::view 0.19% 7.597ms 0.19% 7.597ms 2.771us 0.000us 0.00% 0.000us 0.000us 2742 | |
aten::permute 0.19% 7.507ms 0.20% 7.850ms 40.885us 0.000us 0.00% 0.000us 0.000us 192 | |
aten::as_strided 0.02% 674.000us 0.02% 674.000us 1.560us 0.000us 0.00% 0.000us 0.000us 432 | |
aten::contiguous 0.03% 1.284ms 0.41% 15.970ms 83.177us 0.000us 0.00% 0.000us 0.000us 192 | |
aten::empty_like 0.18% 6.964ms 0.39% 15.097ms 10.146us 0.000us 0.00% 0.000us 0.000us 1488 | |
aten::to 0.08% 3.213ms 1.02% 39.888ms 97.765us 0.000us 0.00% 17.864ms 43.784us 408 | |
aten::empty_strided 0.05% 1.808ms 0.05% 1.808ms 4.498us 0.000us 0.00% 0.000us 0.000us 402 | |
aten::stack 0.01% 472.000us 0.60% 23.545ms 3.924ms 0.000us 0.00% 0.000us 0.000us 6 | |
aten::unsqueeze 0.02% 682.000us 0.02% 910.000us 4.740us 0.000us 0.00% 0.000us 0.000us 192 | |
aten::cat 0.00% 66.000us 0.57% 22.163ms 3.694ms 0.000us 0.00% 0.000us 0.000us 6 | |
aten::_cat 0.56% 21.916ms 0.57% 22.097ms 3.683ms 0.000us 0.00% 0.000us 0.000us 6 | |
aten::resize_ 0.06% 2.271ms 0.06% 2.271ms 6.526us 0.000us 0.00% 0.000us 0.000us 348 | |
aten::narrow 0.00% 29.000us 0.00% 74.000us 12.333us 0.000us 0.00% 0.000us 0.000us 6 | |
aten::slice 0.00% 35.000us 0.00% 45.000us 7.500us 0.000us 0.00% 0.000us 0.000us 6 | |
aten::detach_ 0.00% 23.000us 0.00% 38.000us 6.333us 0.000us 0.00% 0.000us 0.000us 6 | |
detach_ 0.00% 15.000us 0.00% 15.000us 2.500us 0.000us 0.00% 0.000us 0.000us 6 | |
cudaMemcpyAsync 1.02% 39.746ms 1.02% 39.746ms 194.833us 0.000us 0.00% 0.000us 0.000us 204 | |
cudaStreamSynchronize 0.01% 461.000us 0.01% 461.000us 38.417us 0.000us 0.00% 0.000us 0.000us 12 | |
aten::conv2d 0.08% 3.054ms 13.46% 526.234ms 1.655ms 0.000us 0.00% 574.360ms 1.806ms 318 | |
aten::convolution 0.07% 2.805ms 13.38% 523.180ms 1.645ms 0.000us 0.00% 574.360ms 1.806ms 318 | |
aten::_convolution 0.10% 3.858ms 13.31% 520.375ms 1.636ms 0.000us 0.00% 574.360ms 1.806ms 318 | |
aten::_convolution_nogroup 0.09% 3.620ms 13.21% 516.517ms 1.624ms 0.000us 0.00% 574.360ms 1.806ms 318 | |
aten::_nnpack_available 0.01% 260.000us 0.01% 260.000us 0.818us 0.000us 0.00% 0.000us 0.000us 318 | |
aten::thnn_conv2d 0.07% 2.650ms 13.11% 512.637ms 1.612ms 0.000us 0.00% 574.360ms 1.806ms 318 | |
cudaMemsetAsync 2.88% 112.451ms 2.88% 112.451ms 11.044us 0.000us 0.00% 0.000us 0.000us 10182 | |
cudaLaunchKernel 28.59% 1.118s 28.59% 1.118s 20.453us 0.000us 0.00% 0.000us 0.000us 54648 | |
aten::batch_norm 0.08% 2.978ms 1.13% 44.213ms 139.035us 0.000us 0.00% 72.668ms 228.516us 318 | |
aten::_batch_norm_impl_index 0.14% 5.408ms 1.05% 41.235ms 129.670us 0.000us 0.00% 72.668ms 228.516us 318 | |
aten::reshape 0.11% 4.326ms 0.17% 6.680ms 6.958us 0.000us 0.00% 0.000us 0.000us 960 | |
aten::relu_ 0.15% 5.774ms 0.35% 13.745ms 46.752us 0.000us 0.00% 27.406ms 93.218us 294 | |
aten::max_pool2d 0.00% 82.000us 0.02% 640.000us 106.667us 0.000us 0.00% 2.965ms 494.167us 6 | |
aten::adaptive_avg_pool2d 0.00% 79.000us 0.01% 498.000us 83.000us 0.000us 0.00% 695.000us 115.833us 6 | |
aten::flatten 0.00% 46.000us 0.00% 141.000us 23.500us 0.000us 0.00% 0.000us 0.000us 6 | |
aten::t 0.01% 275.000us 0.01% 575.000us 19.167us 0.000us 0.00% 0.000us 0.000us 30 | |
aten::transpose 0.01% 226.000us 0.01% 300.000us 10.000us 0.000us 0.00% 0.000us 0.000us 30 | |
aten::expand 0.00% 86.000us 0.00% 105.000us 8.750us 0.000us 0.00% 0.000us 0.000us 12 | |
aten::log_softmax 0.00% 61.000us 0.01% 479.000us 79.833us 0.000us 0.00% 94.000us 15.667us 6 | |
aten::nll_loss 0.00% 47.000us 0.01% 431.000us 71.833us 0.000us 0.00% 34.000us 5.667us 6 | |
Optimizer.zero_grad#SGD.zero_grad 0.24% 9.235ms 0.91% 35.664ms 5.944ms 0.000us 0.00% 1.808ms 301.333us 6 | |
aten::ones_like 0.00% 90.000us 0.01% 368.000us 61.333us 0.000us 0.00% 6.000us 1.000us 6 | |
NllLossBackward 0.00% 144.000us 0.02% 638.000us 106.333us 0.000us 0.00% 25.000us 4.167us 6 | |
LogSoftmaxBackward 0.00% 98.000us 0.01% 560.000us 93.333us 0.000us 0.00% 83.000us 13.833us 6 | |
AddmmBackward 0.00% 187.000us 0.03% 1.109ms 184.833us 0.000us 0.00% 482.000us 80.333us 6 | |
aten::conj 0.00% 41.000us 0.00% 41.000us 3.417us 0.000us 0.00% 0.000us 0.000us 12 | |
torch::autograd::AccumulateGrad 0.15% 5.680ms 1.36% 53.090ms 54.959us 0.000us 0.00% 4.170ms 4.317us 966 | |
TBackward 0.00% 21.000us 0.00% 124.000us 20.667us 0.000us 0.00% 0.000us 0.000us 6 | |
ViewBackward 0.00% 24.000us 0.00% 98.000us 16.333us 0.000us 0.00% 0.000us 0.000us 6 | |
MeanBackward1 0.00% 65.000us 0.01% 495.000us 82.500us 0.000us 0.00% 456.000us 76.000us 6 | |
ReluBackward1 0.06% 2.240ms 0.38% 14.816ms 50.395us 0.000us 0.00% 39.858ms 135.571us 294 | |
AddBackward0 0.01% 252.000us 0.01% 252.000us 2.625us 0.000us 0.00% 0.000us 0.000us 96 | |
NativeBatchNormBackward 0.11% 4.314ms 0.87% 34.127ms 107.318us 0.000us 0.00% 96.933ms 304.821us 318 | |
ThnnConv2DBackward 0.11% 4.462ms 31.48% 1.231s 3.870ms 0.000us 0.00% 1.496s 4.705ms 318 | |
MaxPool2DWithIndicesBackward 0.00% 75.000us 0.02% 759.000us 126.500us 0.000us 0.00% 12.914ms 2.152ms 6 | |
aten::zeros_like 0.00% 44.000us 0.01% 300.000us 50.000us 0.000us 0.00% 1.028ms 171.333us 6 | |
aten::resize_as_ 0.00% 79.000us 0.00% 87.000us 14.500us 0.000us 0.00% 0.000us 0.000us 6 | |
cudaEventQuery 0.01% 258.000us 0.01% 258.000us 1.344us 0.000us 0.00% 0.000us 0.000us 192 | |
cudaEventRecord 0.00% 183.000us 0.00% 183.000us 0.953us 0.000us 0.00% 0.000us 0.000us 192 | |
Optimizer.step#SGD.step 0.64% 24.981ms 6.13% 239.797ms 39.966ms 0.000us 0.00% 10.482ms 1.747ms 6 | |
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ | |
Self CPU time total: 3.909s | |
Self CUDA time total: 2.444s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment