Skip to content

Instantly share code, notes, and snippets.

@ljaljushkin
Last active January 12, 2023 14:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ljaljushkin/c6daf65ea0c6e119b9336c5edba09a0c to your computer and use it in GitHub Desktop.
Save ljaljushkin/c6daf65ea0c6e119b9336c5edba09a0c to your computer and use it in GitHub Desktop.
The full reproducer, GDB stacktrace and profile log for different batches
import torch
from torch import nn
from torch.optim import SGD
from torch.utils import data
from torch.utils.data import DataLoader
class UNet(nn.Module):
def __init__(
self,
in_channels=3,
n_classes=2,
depth=1,
wf=1,
padding=True
):
super().__init__()
self.padding = padding
self.depth = depth
prev_channels = in_channels
self.down_path = nn.ModuleList()
for i in range(depth):
self.down_path.append(
UNetConvBlock(prev_channels, 2 ** (wf + i), padding)
)
prev_channels = 2 ** (wf + i)
self.last = nn.Conv2d(prev_channels, n_classes, kernel_size=1)
def forward(self, x):
for i, down in enumerate(self.down_path):
x = down(x)
x = self.last(x)
return x
class UNetConvBlock(nn.Module):
def __init__(self, in_size, out_size, padding):
super().__init__()
self.block = nn.Conv2d(in_size, out_size, kernel_size=3, padding=int(padding))
def forward(self, x):
out = self.block(x)
return out
class MockDataset(data.Dataset):
def __init__(self):
super().__init__()
def __len__(self):
return 6
def __getitem__(self, idx):
image = torch.rand((3, 23, 30))
target = torch.randint(0, 1, (23, 30))
return image, target
def main():
train_set = MockDataset()
train_loader = DataLoader(train_set, batch_size=4, num_workers=1, drop_last=True)
model = UNet(n_classes=13)
print(model)
device = 'cpu'
model.to(device)
optimizer = SGD(model.parameters(), lr=1e-3)
for epoch in range(1):
model.train()
for step, batch_data in enumerate(train_loader):
inputs = batch_data[0].to(device)
labels = batch_data[1].to(device)
optimizer.zero_grad()
outputs = model(inputs)
print(outputs.shape, labels.shape)
loss = nn.CrossEntropyLoss()(outputs, labels)
print(f"\nBefore HANG {loss}\n")
loss.backward()
print("\nAFTER HANG\n")
if __name__ == '__main__':
main()
MKLDNN_VERBOSE=2 python3 examples/torch/semantic_segmentation/main.py
UNet(
(down_path): ModuleList(
(0): UNetConvBlock(
(block): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(last): Conv2d(2, 13, kernel_size=(1, 1), stride=(1, 1))
)
onednn_verbose,info,oneDNN v2.6.0 (commit 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
onednn_verbose,info,cpu,runtime:OpenMP,nthr:18
onednn_verbose,info,cpu,isa:Intel AVX-512 with Intel DL Boost
onednn_verbose,info,gpu,runtime:none
onednn_verbose,info,prim_template:operation,engine,primitive,implementation,prop_kind,memory_descriptors,attributes,auxiliary,problem_desc,exec_time
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:Acdb16a:f0,attr-scratchpad:user ,,2x3x3x3,0.197998
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:Acdb16a:f0,attr-scratchpad:user ,,2x3x3x3,0.0012207
onednn_verbose,create:cache_miss,cpu,convolution,jit:avx512_core,forward_training,src_f32::blocked:abcd:f0 wei_f32:p:blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic3oc2_ih23oh23kh3sh1dh0ph1_iw30ow30kw3sw1dw0pw1,0.148926
onednn_verbose,exec,cpu,convolution,jit:avx512_core,forward_training,src_f32::blocked:abcd:f0 wei_f32:p:blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic3oc2_ih23oh23kh3sh1dh0ph1_iw30ow30kw3sw1dw0pw1,0.615967
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.193848
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.0161133
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0319824
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0310059
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16b16a:f0,attr-scratchpad:user ,,13x2x1x1,0.0319824
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16b16a:f0,attr-scratchpad:user ,,13x2x1x1,0.000976562
onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,forward_training,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0908203
onednn_verbose,exec,cpu,convolution,jit_1x1:avx512_core,forward_training,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0681152
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x13x23x30,0.172852
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x13x23x30,0.0168457
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0390625
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.032959
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16a16b:f0,attr-scratchpad:user ,,13x2x1x1,0.0319824
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16a16b:f0,attr-scratchpad:user ,,13x2x1x1,0.000976562
onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,backward_data,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16a16b:f0 bia_undef::undef::f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0720215
onednn_verbose,exec,cpu,convolution,jit_1x1:avx512_core,backward_data,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16a16b:f0 bia_undef::undef::f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0300293
onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.00195312
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.0100098
onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.000976562
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0109863
onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.000976562
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0100098
onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,backward_weights,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.225098
$ MKLDNN_VERBOSE=2 python3 examples/torch/semantic_segmentation/main.py
UNet(
(down_path): ModuleList(
(0): UNetConvBlock(
(block): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(last): Conv2d(2, 13, kernel_size=(1, 1), stride=(1, 1))
)
onednn_verbose,info,oneDNN v2.7.2 (commit fbec3e25a559ee252022ae066817b204e106a6ba)
onednn_verbose,info,cpu,runtime:OpenMP,nthr:18
onednn_verbose,info,cpu,isa:Intel AVX-512 with Intel DL Boost
onednn_verbose,info,gpu,runtime:none
onednn_verbose,info,prim_template:operation,engine,primitive,implementation,prop_kind,memory_descriptors,attributes,auxiliary,problem_desc,exec_time
onednn_verbose,create:cache_miss,cpu,convolution,jit:avx512_core,forward_training,src_f32::blocked:abcd:f0 wei_f32:p:blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic3oc2_ih23oh23kh3sh1dh0ph1_iw30ow30kw3sw1dw0pw1,0.143066
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:Acdb16a:f0,attr-scratchpad:user ,,2x3x3x3,0.0349121
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:Acdb16a:f0,attr-scratchpad:user ,,2x3x3x3,0.00219727
onednn_verbose,exec,cpu,convolution,jit:avx512_core,forward_training,src_f32::blocked:abcd:f0 wei_f32:p:blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic3oc2_ih23oh23kh3sh1dh0ph1_iw30ow30kw3sw1dw0pw1,0.697998
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.141113
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.0151367
onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,forward_training,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0791016
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0200195
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0290527
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16b16a:f0,attr-scratchpad:user ,,13x2x1x1,0.0251465
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16b16a:f0,attr-scratchpad:user ,,13x2x1x1,0.000976562
onednn_verbose,exec,cpu,convolution,jit_1x1:avx512_core,forward_training,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0471191
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x13x23x30,0.138184
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x13x23x30,0.0109863
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0300293
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0300293
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16a16b:f0,attr-scratchpad:user ,,13x2x1x1,0.0229492
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16a16b:f0,attr-scratchpad:user ,,13x2x1x1,0
onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,backward_data,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16a16b:f0 bia_undef::undef::f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0578613
onednn_verbose,exec,cpu,convolution,jit_1x1:avx512_core,backward_data,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16a16b:f0 bia_undef::undef::f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.027832
onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.00195312
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.00878906
onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0012207
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.00805664
onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.000976562
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0239258
onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,backward_weights,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.156006
UNet(
(down_path): ModuleList(
(0): UNetConvBlock(
(block): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(last): Conv2d(2, 13, kernel_size=(1, 1), stride=(1, 1))
)
STAGE:2022-12-30 18:12:36 173859:173859 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
torch.Size([1, 13, 23, 30]) torch.Size([1, 23, 30])
Before HANG 2.654798746109009
AFTER HANG
STAGE:2022-12-30 18:12:37 173859:173859 ActivityProfilerController.cpp:300] Completed Stage: Collection
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ --------------------------------------------------------------------------------
Name Self CPU % Self CPU CPU total % CPU total CPU time avg # of Calls Input Shapes
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ --------------------------------------------------------------------------------
aten::empty 0.04% 54.000us 0.04% 54.000us 2.700us 20 [[], [], [], [], [], []]
aten::random_ 0.02% 26.000us 0.02% 26.000us 26.000us 1 [[], []]
aten::item 0.01% 11.000us 0.02% 22.000us 11.000us 2 [[]]
aten::_local_scalar_dense 0.01% 11.000us 0.01% 11.000us 5.500us 2 [[]]
aten::zeros 0.04% 55.000us 0.05% 69.000us 23.000us 3 [[], [], [], [], []]
aten::zero_ 0.00% 4.000us 0.00% 4.000us 1.333us 3 [[1]]
enumerate(DataLoader)#_MultiProcessingDataLoaderIter... 12.09% 15.623ms 12.12% 15.665ms 7.832ms 2 []
aten::to 0.00% 1.000us 0.00% 1.000us 0.500us 2 [[0], [], [], [], [], []]
aten::lift_fresh 0.00% 4.000us 0.00% 4.000us 2.000us 2 [[0]]
aten::detach_ 0.00% 4.000us 0.00% 6.000us 3.000us 2 [[0]]
detach_ 0.00% 3.000us 0.00% 3.000us 1.500us 2 [[0]]
aten::set_ 0.01% 18.000us 0.01% 18.000us 9.000us 2 [[0], [], [], [], []]
aten::to 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[1, 3, 23, 30], [], [], [], [], [], [], []]
aten::to 0.00% 0.000us 0.00% 0.000us 0.000us 1 [[1, 23, 30], [], [], [], [], [], [], []]
Optimizer.zero_grad#SGD.zero_grad 0.01% 16.000us 0.01% 16.000us 16.000us 1 []
aten::conv2d 0.01% 17.000us 0.96% 1.242ms 1.242ms 1 [[1, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], []]
aten::convolution 0.03% 38.000us 0.95% 1.225ms 1.225ms 1 [[1, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], [], [], []]
aten::_convolution 0.02% 29.000us 0.92% 1.187ms 1.187ms 1 [[1, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], [], [], [], [], [], [], []]
aten::_nnpack_available 0.01% 18.000us 0.01% 18.000us 4.500us 4 []
aten::thnn_conv2d 0.01% 10.000us 0.88% 1.140ms 1.140ms 1 [[1, 3, 23, 30], [2, 3, 3, 3], [], [2], [], []]
aten::_slow_conv2d_forward 0.83% 1.073ms 0.87% 1.130ms 1.130ms 1 [[1, 3, 23, 30], [2, 3, 3, 3], [], [2], [], []]
aten::view 0.00% 6.000us 0.00% 6.000us 3.000us 2 [[2, 3, 3, 3], []]
aten::resize_ 0.01% 11.000us 0.01% 11.000us 1.833us 6 [[0], [], []]
aten::reshape 0.01% 18.000us 0.01% 19.000us 19.000us 1 [[2], []]
aten::_reshape_alias 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[2], [], []]
aten::copy_ 0.02% 23.000us 0.02% 23.000us 23.000us 1 [[1, 2, 23, 30], [2, 1, 1], []]
aten::conv2d 0.00% 3.000us 0.05% 65.000us 65.000us 1 [[1, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], []]
aten::convolution 0.01% 11.000us 0.05% 62.000us 62.000us 1 [[1, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], [], [], []]
aten::_convolution 0.01% 8.000us 0.04% 51.000us 51.000us 1 [[1, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], [], [], [], [], [], [], []]
aten::thnn_conv2d 0.00% 2.000us 0.03% 43.000us 43.000us 1 [[1, 2, 23, 30], [13, 2, 1, 1], [], [13], [], []]
aten::_slow_conv2d_forward 0.02% 24.000us 0.03% 41.000us 41.000us 1 [[1, 2, 23, 30], [13, 2, 1, 1], [], [13], [], []]
aten::view 0.00% 4.000us 0.00% 4.000us 1.333us 3 [[13, 2, 1, 1], []]
aten::view 0.00% 1.000us 0.00% 1.000us 0.500us 2 [[1, 2, 23, 30], []]
aten::detach 0.00% 3.000us 0.00% 3.000us 1.500us 2 [[1, 2, 690]]
aten::reshape 0.00% 4.000us 0.00% 4.000us 4.000us 1 [[13], []]
aten::_reshape_alias 0.00% 0.000us 0.00% 0.000us 0.000us 1 [[13], [], []]
aten::copy_ 0.01% 7.000us 0.01% 7.000us 7.000us 1 [[1, 13, 23, 30], [13, 1, 1], []]
aten::cross_entropy_loss 0.02% 32.000us 0.14% 175.000us 175.000us 1 [[1, 13, 23, 30], [1, 23, 30], [], [], [], []]
aten::log_softmax 0.02% 20.000us 0.08% 101.000us 101.000us 1 [[1, 13, 23, 30], [], []]
aten::to 0.00% 0.000us 0.00% 0.000us 0.000us 1 [[1, 13, 23, 30], [], [], [], [], [], [], []]
aten::_log_softmax 0.06% 81.000us 0.06% 81.000us 81.000us 1 [[1, 13, 23, 30], [], []]
aten::nll_loss_nd 0.00% 3.000us 0.03% 42.000us 42.000us 1 [[1, 13, 23, 30], [1, 23, 30], [], [], []]
aten::nll_loss2d 0.01% 7.000us 0.03% 39.000us 39.000us 1 [[1, 13, 23, 30], [1, 23, 30], [], [], []]
aten::nll_loss2d_forward 0.02% 30.000us 0.02% 32.000us 32.000us 1 [[1, 13, 23, 30], [1, 23, 30], [], [], []]
aten::ones_like 0.01% 14.000us 0.02% 23.000us 23.000us 1 [[], [], [], [], [], []]
aten::empty_like 0.00% 5.000us 0.01% 8.000us 8.000us 1 [[], [], [], [], [], []]
aten::empty_strided 0.01% 10.000us 0.01% 10.000us 5.000us 2 [[], [], [], [], [], []]
aten::fill_ 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[], []]
autograd::engine::evaluate_function: NllLoss2DBackwa... 0.01% 10.000us 0.10% 124.000us 124.000us 1 []
NllLoss2DBackward0 0.02% 32.000us 0.09% 114.000us 114.000us 1 [[]]
aten::nll_loss2d_backward 0.03% 38.000us 0.06% 82.000us 82.000us 1 [[], [1, 13, 23, 30], [1, 23, 30], [], [], [], []]
aten::zeros_like 0.01% 7.000us 0.03% 39.000us 39.000us 1 [[1, 13, 23, 30], [], [], [], [], []]
aten::empty_like 0.00% 5.000us 0.01% 12.000us 12.000us 1 [[1, 13, 23, 30], [], [], [], [], []]
aten::zero_ 0.02% 21.000us 0.02% 21.000us 10.500us 2 [[1, 13, 23, 30]]
aten::resize_as_ 0.00% 4.000us 0.00% 4.000us 4.000us 1 [[1, 13, 23, 30], [1, 13, 23, 30], []]
aten::resize_ 0.00% 0.000us 0.00% 0.000us 0.000us 1 [[1, 13, 23, 30], [], []]
autograd::engine::evaluate_function: LogSoftmaxBackw... 0.00% 4.000us 0.07% 85.000us 85.000us 1 []
LogSoftmaxBackward0 0.02% 26.000us 0.06% 81.000us 81.000us 1 [[1, 13, 23, 30]]
aten::_log_softmax_backward_data 0.04% 55.000us 0.04% 55.000us 55.000us 1 [[1, 13, 23, 30], [1, 13, 23, 30], [], []]
autograd::engine::evaluate_function: ConvolutionBack... 0.01% 15.000us 0.27% 344.000us 172.000us 2 []
ConvolutionBackward0 0.01% 15.000us 0.18% 228.000us 228.000us 1 [[1, 13, 23, 30]]
aten::convolution_backward 0.04% 47.000us 0.16% 213.000us 213.000us 1 [[1, 13, 23, 30], [1, 2, 23, 30], [13, 2, 1, 1], [], [], [], [], [], [], [], []]
aten::_slow_conv2d_backward 0.08% 100.000us 0.13% 166.000us 166.000us 1 [[1, 13, 23, 30], [1, 2, 23, 30], [13, 2, 1, 1], [], [], [], []]
aten::resize_as_ 0.00% 3.000us 0.00% 4.000us 4.000us 1 [[0], [1, 2, 23, 30], []]
aten::zero_ 0.00% 0.000us 0.00% 0.000us 0.000us 1 [[1, 2, 23, 30]]
aten::sum 0.04% 46.000us 0.04% 55.000us 55.000us 1 [[1, 13, 23, 30], [], [], [], [0]]
aten::as_strided 0.00% 2.000us 0.00% 2.000us 2.000us 1 [[13], [], [], []]
aten::fill_ 0.01% 7.000us 0.01% 7.000us 7.000us 1 [[1, 13, 1, 1], []]
aten::zero_ 0.00% 0.000us 0.00% 0.000us 0.000us 1 [[13, 2, 1, 1]]
autograd::engine::evaluate_function: torch::autograd... 0.01% 19.000us 0.04% 46.000us 11.500us 4 []
torch::autograd::AccumulateGrad 0.00% 4.000us 0.01% 10.000us 10.000us 1 [[13, 2, 1, 1]]
aten::detach 0.00% 3.000us 0.00% 6.000us 6.000us 1 [[13, 2, 1, 1]]
detach 0.00% 3.000us 0.00% 3.000us 3.000us 1 [[13, 2, 1, 1]]
torch::autograd::AccumulateGrad 0.00% 2.000us 0.00% 5.000us 5.000us 1 [[13]]
aten::detach 0.00% 2.000us 0.00% 3.000us 3.000us 1 [[13]]
detach 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[13]]
ConvolutionBackward0 0.00% 6.000us 0.08% 101.000us 101.000us 1 [[1, 2, 23, 30]]
aten::convolution_backward 0.01% 11.000us 0.07% 95.000us 95.000us 1 [[1, 2, 23, 30], [1, 3, 23, 30], [2, 3, 3, 3], [], [], [], [], [], [], [], []]
aten::_slow_conv2d_backward 0.04% 56.000us 0.06% 84.000us 84.000us 1 [[1, 2, 23, 30], [1, 3, 23, 30], [2, 3, 3, 3], [], [], [], []]
aten::sum 0.01% 11.000us 0.01% 13.000us 13.000us 1 [[1, 2, 23, 30], [], [], [], [0]]
aten::as_strided 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[2], [], [], []]
aten::fill_ 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[1, 2, 1, 1], []]
aten::zero_ 0.00% 0.000us 0.00% 0.000us 0.000us 1 [[2, 3, 3, 3]]
torch::autograd::AccumulateGrad 0.00% 3.000us 0.01% 7.000us 7.000us 1 [[2, 3, 3, 3]]
aten::detach 0.00% 2.000us 0.00% 4.000us 4.000us 1 [[2, 3, 3, 3]]
detach 0.00% 2.000us 0.00% 2.000us 2.000us 1 [[2, 3, 3, 3]]
torch::autograd::AccumulateGrad 0.00% 2.000us 0.00% 5.000us 5.000us 1 [[2]]
aten::detach 0.00% 2.000us 0.00% 3.000us 3.000us 1 [[2]]
detach 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[2]]
cudaGetDeviceCount 0.00% 0.000us 0.00% 0.000us 0.000us 1 []
cudaGetDeviceProperties 0.20% 256.000us 0.20% 256.000us 256.000us 1 []
cudaDeviceSynchronize 85.94% 111.079ms 85.94% 111.079ms 111.079ms 1 []
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ --------------------------------------------------------------------------------
Self CPU time total: 129.249ms
UNet(
(down_path): ModuleList(
(0): UNetConvBlock(
(block): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(last): Conv2d(2, 13, kernel_size=(1, 1), stride=(1, 1))
)
STAGE:2022-12-30 16:54:22 172566:172566 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
torch.Size([4, 13, 23, 30]) torch.Size([4, 23, 30])
Before HANG 3.179154872894287
AFTER HANG
STAGE:2022-12-30 16:54:23 172566:172566 ActivityProfilerController.cpp:300] Completed Stage: Collection
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ --------------------------------------------------------------------------------
Name Self CPU % Self CPU CPU total % CPU total CPU time avg # of Calls Input Shapes
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ --------------------------------------------------------------------------------
aten::empty 0.03% 58.000us 0.03% 58.000us 2.762us 21 [[], [], [], [], [], []]
aten::random_ 0.01% 21.000us 0.01% 21.000us 21.000us 1 [[], []]
aten::item 0.01% 10.000us 0.01% 15.000us 7.500us 2 [[]]
aten::_local_scalar_dense 0.00% 5.000us 0.00% 5.000us 2.500us 2 [[]]
aten::zeros 0.03% 48.000us 0.03% 61.000us 20.333us 3 [[], [], [], [], []]
aten::zero_ 0.00% 3.000us 0.00% 3.000us 1.000us 3 [[1]]
enumerate(DataLoader)#_MultiProcessingDataLoaderIter... 9.26% 16.620ms 9.28% 16.653ms 8.326ms 2 []
aten::to 0.00% 1.000us 0.00% 1.000us 0.500us 2 [[0], [], [], [], [], []]
aten::lift_fresh 0.00% 2.000us 0.00% 2.000us 1.000us 2 [[0]]
aten::detach_ 0.00% 4.000us 0.00% 5.000us 2.500us 2 [[0]]
detach_ 0.00% 1.000us 0.00% 1.000us 0.500us 2 [[0]]
aten::set_ 0.01% 13.000us 0.01% 13.000us 6.500us 2 [[0], [], [], [], []]
aten::to 0.00% 0.000us 0.00% 0.000us 0.000us 1 [[4, 3, 23, 30], [], [], [], [], [], [], []]
aten::to 0.00% 0.000us 0.00% 0.000us 0.000us 1 [[4, 23, 30], [], [], [], [], [], [], []]
Optimizer.zero_grad#SGD.zero_grad 0.01% 12.000us 0.01% 12.000us 12.000us 1 []
aten::conv2d 0.01% 12.000us 1.15% 2.057ms 2.057ms 1 [[4, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], []]
aten::convolution 0.02% 35.000us 1.14% 2.045ms 2.045ms 1 [[4, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], [], [], []]
aten::_convolution 0.01% 16.000us 1.12% 2.010ms 2.010ms 1 [[4, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], [], [], [], [], [], [], []]
aten::mkldnn_convolution 1.10% 1.978ms 1.11% 1.994ms 1.994ms 1 [[4, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], []]
aten::as_strided_ 0.01% 11.000us 0.01% 11.000us 5.500us 2 [[4, 2, 23, 30], [], [], []]
aten::conv2d 0.00% 3.000us 0.27% 476.000us 476.000us 1 [[4, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], []]
aten::convolution 0.01% 14.000us 0.26% 473.000us 473.000us 1 [[4, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], [], [], []]
aten::_convolution 0.00% 7.000us 0.26% 459.000us 459.000us 1 [[4, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], [], [], [], [], [], [], []]
aten::mkldnn_convolution 0.25% 448.000us 0.25% 452.000us 452.000us 1 [[4, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], []]
aten::as_strided_ 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[4, 13, 23, 30], [], [], []]
aten::cross_entropy_loss 0.02% 28.000us 0.18% 327.000us 327.000us 1 [[4, 13, 23, 30], [4, 23, 30], [], [], [], []]
aten::log_softmax 0.01% 12.000us 0.13% 231.000us 231.000us 1 [[4, 13, 23, 30], [], []]
aten::to 0.00% 0.000us 0.00% 0.000us 0.000us 1 [[4, 13, 23, 30], [], [], [], [], [], [], []]
aten::_log_softmax 0.12% 219.000us 0.12% 219.000us 219.000us 1 [[4, 13, 23, 30], [], []]
aten::nll_loss_nd 0.00% 3.000us 0.04% 68.000us 68.000us 1 [[4, 13, 23, 30], [4, 23, 30], [], [], []]
aten::nll_loss2d 0.00% 3.000us 0.04% 65.000us 65.000us 1 [[4, 13, 23, 30], [4, 23, 30], [], [], []]
aten::nll_loss2d_forward 0.03% 60.000us 0.03% 62.000us 62.000us 1 [[4, 13, 23, 30], [4, 23, 30], [], [], []]
aten::resize_ 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[0], [], []]
aten::ones_like 0.01% 24.000us 0.02% 32.000us 32.000us 1 [[], [], [], [], [], []]
aten::empty_like 0.00% 4.000us 0.00% 8.000us 8.000us 1 [[], [], [], [], [], []]
aten::empty_strided 0.01% 15.000us 0.01% 15.000us 7.500us 2 [[], [], [], [], [], []]
aten::fill_ 0.00% 0.000us 0.00% 0.000us 0.000us 1 [[], []]
autograd::engine::evaluate_function: NllLoss2DBackwa... 0.01% 16.000us 0.11% 198.000us 198.000us 1 []
NllLoss2DBackward0 0.02% 34.000us 0.10% 182.000us 182.000us 1 [[]]
aten::nll_loss2d_backward 0.02% 30.000us 0.08% 148.000us 148.000us 1 [[], [4, 13, 23, 30], [4, 23, 30], [], [], [], []]
aten::zeros_like 0.00% 5.000us 0.06% 99.000us 99.000us 1 [[4, 13, 23, 30], [], [], [], [], []]
aten::empty_like 0.00% 6.000us 0.01% 17.000us 17.000us 1 [[4, 13, 23, 30], [], [], [], [], []]
aten::zero_ 0.00% 5.000us 0.05% 89.000us 44.500us 2 [[4, 13, 23, 30]]
aten::fill_ 0.05% 84.000us 0.05% 84.000us 42.000us 2 [[4, 13, 23, 30], []]
aten::resize_as_ 0.00% 7.000us 0.00% 7.000us 7.000us 1 [[4, 13, 23, 30], [4, 13, 23, 30], []]
aten::resize_ 0.00% 0.000us 0.00% 0.000us 0.000us 1 [[4, 13, 23, 30], [], []]
autograd::engine::evaluate_function: LogSoftmaxBackw... 0.00% 5.000us 0.08% 135.000us 135.000us 1 []
LogSoftmaxBackward0 0.01% 22.000us 0.07% 130.000us 130.000us 1 [[4, 13, 23, 30]]
aten::_log_softmax_backward_data 0.06% 108.000us 0.06% 108.000us 108.000us 1 [[4, 13, 23, 30], [4, 13, 23, 30], [], []]
autograd::engine::evaluate_function: ConvolutionBack... 0.01% 21.000us 1.10% 1.970ms 985.000us 2 []
ConvolutionBackward0 0.02% 28.000us 0.69% 1.241ms 1.241ms 1 [[4, 13, 23, 30]]
aten::convolution_backward 0.67% 1.198ms 0.68% 1.213ms 1.213ms 1 [[4, 13, 23, 30], [4, 2, 23, 30], [13, 2, 1, 1], [], [], [], [], [], [], [], []]
aten::as_strided_ 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[13], [], [], []]
aten::as_strided_ 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[13, 2, 1, 1], [], [], []]
autograd::engine::evaluate_function: torch::autograd... 0.00% 7.000us 0.02% 33.000us 8.250us 4 []
torch::autograd::AccumulateGrad 0.00% 5.000us 0.01% 12.000us 12.000us 1 [[13, 2, 1, 1]]
aten::detach 0.00% 2.000us 0.00% 7.000us 7.000us 1 [[13, 2, 1, 1]]
detach 0.00% 5.000us 0.00% 5.000us 5.000us 1 [[13, 2, 1, 1]]
torch::autograd::AccumulateGrad 0.00% 1.000us 0.00% 3.000us 3.000us 1 [[13]]
aten::detach 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[13]]
detach 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[13]]
ConvolutionBackward0 0.00% 6.000us 0.39% 708.000us 708.000us 1 [[4, 2, 23, 30]]
aten::convolution_backward 0.39% 695.000us 0.39% 702.000us 702.000us 1 [[4, 2, 23, 30], [4, 3, 23, 30], [2, 3, 3, 3], [], [], [], [], [], [], [], []]
aten::as_strided_ 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[2], [], [], []]
aten::as_strided_ 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[2, 3, 3, 3], [], [], []]
torch::autograd::AccumulateGrad 0.00% 4.000us 0.00% 8.000us 8.000us 1 [[2, 3, 3, 3]]
aten::detach 0.00% 1.000us 0.00% 4.000us 4.000us 1 [[2, 3, 3, 3]]
detach 0.00% 3.000us 0.00% 3.000us 3.000us 1 [[2, 3, 3, 3]]
torch::autograd::AccumulateGrad 0.00% 1.000us 0.00% 3.000us 3.000us 1 [[2]]
aten::detach 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[2]]
detach 0.00% 1.000us 0.00% 1.000us 1.000us 1 [[2]]
cudaGetDeviceCount 0.00% 2.000us 0.00% 2.000us 2.000us 1 []
cudaGetDeviceProperties 0.15% 262.000us 0.15% 262.000us 262.000us 1 []
cudaDeviceSynchronize 87.59% 157.179ms 87.59% 157.179ms 157.179ms 1 []
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ --------------------------------------------------------------------------------
Self CPU time total: 179.442ms
#0 0x00007fca0afed99f in __GI___poll (fds=0x7fc9ebbd0040, nfds=1, timeout=5000) at ../sysdeps/unix/sysv/linux/poll.c:29
#1 0x0000000000636b90 in ?? ()
#2 0x00000000005048b3 in ?? ()
#3 0x000000000056b1da in _PyEval_EvalFrameDefault ()
#4 0x000000000056939a in _PyEval_EvalCodeWithName ()
#5 0x00000000005f6a13 in _PyFunction_Vectorcall ()
#6 0x000000000056b1da in _PyEval_EvalFrameDefault ()
#7 0x000000000056939a in _PyEval_EvalCodeWithName ()
#8 0x00000000005f6a13 in _PyFunction_Vectorcall ()
#9 0x000000000056b0ae in _PyEval_EvalFrameDefault ()
#10 0x00000000005f6836 in _PyFunction_Vectorcall ()
#11 0x000000000056b1da in _PyEval_EvalFrameDefault ()
#12 0x000000000056939a in _PyEval_EvalCodeWithName ()
#13 0x000000000050aaa0 in ?? ()
#14 0x0000000000570035 in _PyEval_EvalFrameDefault ()
#15 0x000000000056939a in _PyEval_EvalCodeWithName ()
#16 0x000000000050aaa0 in ?? ()
#17 0x000000000056c28c in _PyEval_EvalFrameDefault ()
#18 0x00000000005f6836 in _PyFunction_Vectorcall ()
#19 0x00000000005f3547 in PyObject_Call ()
#20 0x000000000056c8cd in _PyEval_EvalFrameDefault ()
#21 0x00000000005f6836 in _PyFunction_Vectorcall ()
#22 0x000000000056b1da in _PyEval_EvalFrameDefault ()
#23 0x000000000056939a in _PyEval_EvalCodeWithName ()
#24 0x000000000050aaa0 in ?? ()
#25 0x000000000056c28c in _PyEval_EvalFrameDefault ()
#26 0x00000000005f6836 in _PyFunction_Vectorcall ()
#27 0x000000000056b1da in _PyEval_EvalFrameDefault ()
#28 0x000000000059bdfb in ?? ()
#29 0x00000000005f3d7f in _PyObject_MakeTpCall () [37/691]
#30 0x0000000000570266 in _PyEval_EvalFrameDefault ()
#31 0x00000000005f6836 in _PyFunction_Vectorcall ()
#32 0x0000000000570035 in _PyEval_EvalFrameDefault ()
#33 0x00000000005f6836 in _PyFunction_Vectorcall ()
#34 0x0000000000570035 in _PyEval_EvalFrameDefault ()
#35 0x00000000005f6836 in _PyFunction_Vectorcall ()
#36 0x000000000056b1da in _PyEval_EvalFrameDefault ()
#37 0x000000000056939a in _PyEval_EvalCodeWithName ()
#38 0x000000000059bf26 in ?? ()
#39 0x00000000005f3d7f in _PyObject_MakeTpCall ()
#40 0x0000000000570266 in _PyEval_EvalFrameDefault ()
#41 0x00000000005f6836 in _PyFunction_Vectorcall ()
#42 0x000000000056b1da in _PyEval_EvalFrameDefault ()
#43 0x00000000005f6836 in _PyFunction_Vectorcall ()
#44 0x00000000005a7b41 in ?? ()
#45 0x000000000069e876 in ?? ()
#46 0x00000000005121b8 in PyObject_GetIter ()
#47 0x0000000000614891 in ?? ()
#48 0x00000000005f3d03 in _PyObject_MakeTpCall ()
#49 0x0000000000570266 in _PyEval_EvalFrameDefault ()
#50 0x00000000005f6836 in _PyFunction_Vectorcall ()
#51 0x000000000056b0ae in _PyEval_EvalFrameDefault ()
#52 0x000000000056939a in _PyEval_EvalCodeWithName ()
#53 0x000000000068d047 in PyEval_EvalCode ()
#54 0x000000000067e351 in ?? ()
#55 0x000000000067e3cf in ?? ()
#56 0x000000000067e471 in ?? ()
#57 0x000000000067e817 in PyRun_SimpleFileExFlags ()
#58 0x00000000006b6fe2 in Py_RunMain ()
#59 0x00000000006b736d in Py_BytesMain ()
#60 0x00007fca0aeff083 in __libc_start_main (main=0x4eead0 <main>, argc=13, argv=0x7ffecda1a858, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7ffecda1a848) at ../csu/libc-start.c:308
#61 0x00000000005fa5ce in _start ()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment