ljaljushkin/main.py

## main.py
import torch
from torch import nn
from torch.optim import SGD
from torch.utils import data
from torch.utils.data import DataLoader


class UNet(nn.Module):
    def __init__(
            self,
            in_channels=3,
            n_classes=2,
            depth=1,
            wf=1,
            padding=True
    ):
        super().__init__()
        self.padding = padding
        self.depth = depth
        prev_channels = in_channels
        self.down_path = nn.ModuleList()
        for i in range(depth):
            self.down_path.append(
                UNetConvBlock(prev_channels, 2 ** (wf + i), padding)
            )
            prev_channels = 2 ** (wf + i)

        self.last = nn.Conv2d(prev_channels, n_classes, kernel_size=1)

    def forward(self, x):
        for i, down in enumerate(self.down_path):
            x = down(x)
        x = self.last(x)
        return x


class UNetConvBlock(nn.Module):
    def __init__(self, in_size, out_size, padding):
        super().__init__()
        self.block = nn.Conv2d(in_size, out_size, kernel_size=3, padding=int(padding))

    def forward(self, x):
        out = self.block(x)
        return out


class MockDataset(data.Dataset):
    def __init__(self):
        super().__init__()

    def __len__(self):
        return 6

    def __getitem__(self, idx):
        image = torch.rand((3, 23, 30))
        target = torch.randint(0, 1, (23, 30))
        return image, target


def main():
    train_set = MockDataset()
    train_loader = DataLoader(train_set, batch_size=4, num_workers=1, drop_last=True)
    model = UNet(n_classes=13)
    print(model)
    device = 'cpu'
    model.to(device)
    optimizer = SGD(model.parameters(), lr=1e-3)
    for epoch in range(1):
        model.train()
        for step, batch_data in enumerate(train_loader):
            inputs = batch_data[0].to(device)
            labels = batch_data[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            print(outputs.shape, labels.shape)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            print(f"\nBefore HANG {loss}\n")
            loss.backward()
            print("\nAFTER HANG\n")


if __name__ == '__main__':
    main()

## mkldnn_verbose_torch_1.13.1+cpu
MKLDNN_VERBOSE=2 python3 examples/torch/semantic_segmentation/main.py
UNet(
  (down_path): ModuleList(
    (0): UNetConvBlock(
      (block): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
  )
  (last): Conv2d(2, 13, kernel_size=(1, 1), stride=(1, 1))
)
onednn_verbose,info,oneDNN v2.6.0 (commit 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
onednn_verbose,info,cpu,runtime:OpenMP,nthr:18
onednn_verbose,info,cpu,isa:Intel AVX-512 with Intel DL Boost
onednn_verbose,info,gpu,runtime:none
onednn_verbose,info,prim_template:operation,engine,primitive,implementation,prop_kind,memory_descriptors,attributes,auxiliary,problem_desc,exec_time
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:Acdb16a:f0,attr-scratchpad:user ,,2x3x3x3,0.197998
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:Acdb16a:f0,attr-scratchpad:user ,,2x3x3x3,0.0012207
onednn_verbose,create:cache_miss,cpu,convolution,jit:avx512_core,forward_training,src_f32::blocked:abcd:f0 wei_f32:p:blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic3oc2_ih23oh23kh3sh1dh0ph1_iw30ow30kw3sw1dw0pw1,0.148926
onednn_verbose,exec,cpu,convolution,jit:avx512_core,forward_training,src_f32::blocked:abcd:f0 wei_f32:p:blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic3oc2_ih23oh23kh3sh1dh0ph1_iw30ow30kw3sw1dw0pw1,0.615967
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.193848
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.0161133
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0319824
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0310059
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16b16a:f0,attr-scratchpad:user ,,13x2x1x1,0.0319824
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16b16a:f0,attr-scratchpad:user ,,13x2x1x1,0.000976562
onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,forward_training,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0908203
onednn_verbose,exec,cpu,convolution,jit_1x1:avx512_core,forward_training,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0681152
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x13x23x30,0.172852
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x13x23x30,0.0168457
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0390625
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.032959
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16a16b:f0,attr-scratchpad:user ,,13x2x1x1,0.0319824
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16a16b:f0,attr-scratchpad:user ,,13x2x1x1,0.000976562
onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,backward_data,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16a16b:f0 bia_undef::undef::f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0720215
onednn_verbose,exec,cpu,convolution,jit_1x1:avx512_core,backward_data,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16a16b:f0 bia_undef::undef::f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0300293
onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.00195312
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.0100098
onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.000976562
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0109863
onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.000976562
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0100098
onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,backward_weights,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.225098

## mkldnn_verbose_torch_2.0.0.dev20230112+cpu
$ MKLDNN_VERBOSE=2 python3 examples/torch/semantic_segmentation/main.py
UNet(
  (down_path): ModuleList(
    (0): UNetConvBlock(
      (block): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
  )
  (last): Conv2d(2, 13, kernel_size=(1, 1), stride=(1, 1))
)
onednn_verbose,info,oneDNN v2.7.2 (commit fbec3e25a559ee252022ae066817b204e106a6ba)
onednn_verbose,info,cpu,runtime:OpenMP,nthr:18
onednn_verbose,info,cpu,isa:Intel AVX-512 with Intel DL Boost
onednn_verbose,info,gpu,runtime:none
onednn_verbose,info,prim_template:operation,engine,primitive,implementation,prop_kind,memory_descriptors,attributes,auxiliary,problem_desc,exec_time
onednn_verbose,create:cache_miss,cpu,convolution,jit:avx512_core,forward_training,src_f32::blocked:abcd:f0 wei_f32:p:blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic3oc2_ih23oh23kh3sh1dh0ph1_iw30ow30kw3sw1dw0pw1,0.143066
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:Acdb16a:f0,attr-scratchpad:user ,,2x3x3x3,0.0349121
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:Acdb16a:f0,attr-scratchpad:user ,,2x3x3x3,0.00219727
onednn_verbose,exec,cpu,convolution,jit:avx512_core,forward_training,src_f32::blocked:abcd:f0 wei_f32:p:blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic3oc2_ih23oh23kh3sh1dh0ph1_iw30ow30kw3sw1dw0pw1,0.697998
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.141113
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.0151367
onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,forward_training,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0791016
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0200195
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0290527
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16b16a:f0,attr-scratchpad:user ,,13x2x1x1,0.0251465
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16b16a:f0,attr-scratchpad:user ,,13x2x1x1,0.000976562
onednn_verbose,exec,cpu,convolution,jit_1x1:avx512_core,forward_training,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0471191
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x13x23x30,0.138184
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x13x23x30,0.0109863
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0300293
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0300293
onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16a16b:f0,attr-scratchpad:user ,,13x2x1x1,0.0229492
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16a16b:f0,attr-scratchpad:user ,,13x2x1x1,0
onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,backward_data,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16a16b:f0 bia_undef::undef::f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0578613
onednn_verbose,exec,cpu,convolution,jit_1x1:avx512_core,backward_data,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16a16b:f0 bia_undef::undef::f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.027832
onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.00195312
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.00878906
onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0012207
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.00805664
onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.000976562
onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0239258
onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,backward_weights,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.156006

## profile_batch_size_1.log
UNet(
  (down_path): ModuleList(
    (0): UNetConvBlock(
      (block): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
  )
  (last): Conv2d(2, 13, kernel_size=(1, 1), stride=(1, 1))
)
STAGE:2022-12-30 18:12:36 173859:173859 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
torch.Size([1, 13, 23, 30]) torch.Size([1, 23, 30])

Before HANG 2.654798746109009


AFTER HANG

STAGE:2022-12-30 18:12:37 173859:173859 ActivityProfilerController.cpp:300] Completed Stage: Collection
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls                                                                      Input Shapes
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------
                                            aten::empty         0.04%      54.000us         0.04%      54.000us       2.700us            20                                                          [[], [], [], [], [], []]
                                          aten::random_         0.02%      26.000us         0.02%      26.000us      26.000us             1                                                                          [[], []]
                                             aten::item         0.01%      11.000us         0.02%      22.000us      11.000us             2                                                                              [[]]
                              aten::_local_scalar_dense         0.01%      11.000us         0.01%      11.000us       5.500us             2                                                                              [[]]
                                            aten::zeros         0.04%      55.000us         0.05%      69.000us      23.000us             3                                                              [[], [], [], [], []]
                                            aten::zero_         0.00%       4.000us         0.00%       4.000us       1.333us             3                                                                             [[1]]
enumerate(DataLoader)#_MultiProcessingDataLoaderIter...        12.09%      15.623ms        12.12%      15.665ms       7.832ms             2                                                                                []
                                               aten::to         0.00%       1.000us         0.00%       1.000us       0.500us             2                                                         [[0], [], [], [], [], []]
                                       aten::lift_fresh         0.00%       4.000us         0.00%       4.000us       2.000us             2                                                                             [[0]]
                                          aten::detach_         0.00%       4.000us         0.00%       6.000us       3.000us             2                                                                             [[0]]
                                                detach_         0.00%       3.000us         0.00%       3.000us       1.500us             2                                                                             [[0]]
                                             aten::set_         0.01%      18.000us         0.01%      18.000us       9.000us             2                                                             [[0], [], [], [], []]
                                               aten::to         0.00%       1.000us         0.00%       1.000us       1.000us             1                                      [[1, 3, 23, 30], [], [], [], [], [], [], []]
                                               aten::to         0.00%       0.000us         0.00%       0.000us       0.000us             1                                         [[1, 23, 30], [], [], [], [], [], [], []]
                      Optimizer.zero_grad#SGD.zero_grad         0.01%      16.000us         0.01%      16.000us      16.000us             1                                                                                []
                                           aten::conv2d         0.01%      17.000us         0.96%       1.242ms       1.242ms             1                               [[1, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], []]
                                      aten::convolution         0.03%      38.000us         0.95%       1.225ms       1.225ms             1                       [[1, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], [], [], []]
                                     aten::_convolution         0.02%      29.000us         0.92%       1.187ms       1.187ms             1       [[1, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], [], [], [], [], [], [], []]
                                aten::_nnpack_available         0.01%      18.000us         0.01%      18.000us       4.500us             4                                                                                []
                                      aten::thnn_conv2d         0.01%      10.000us         0.88%       1.140ms       1.140ms             1                                   [[1, 3, 23, 30], [2, 3, 3, 3], [], [2], [], []]
                             aten::_slow_conv2d_forward         0.83%       1.073ms         0.87%       1.130ms       1.130ms             1                                   [[1, 3, 23, 30], [2, 3, 3, 3], [], [2], [], []]
                                             aten::view         0.00%       6.000us         0.00%       6.000us       3.000us             2                                                                [[2, 3, 3, 3], []]
                                          aten::resize_         0.01%      11.000us         0.01%      11.000us       1.833us             6                                                                     [[0], [], []]
                                          aten::reshape         0.01%      18.000us         0.01%      19.000us      19.000us             1                                                                         [[2], []]
                                   aten::_reshape_alias         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                                     [[2], [], []]
                                            aten::copy_         0.02%      23.000us         0.02%      23.000us      23.000us             1                                                   [[1, 2, 23, 30], [2, 1, 1], []]
                                           aten::conv2d         0.00%       3.000us         0.05%      65.000us      65.000us             1                             [[1, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], []]
                                      aten::convolution         0.01%      11.000us         0.05%      62.000us      62.000us             1                     [[1, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], [], [], []]
                                     aten::_convolution         0.01%       8.000us         0.04%      51.000us      51.000us             1     [[1, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], [], [], [], [], [], [], []]
                                      aten::thnn_conv2d         0.00%       2.000us         0.03%      43.000us      43.000us             1                                 [[1, 2, 23, 30], [13, 2, 1, 1], [], [13], [], []]
                             aten::_slow_conv2d_forward         0.02%      24.000us         0.03%      41.000us      41.000us             1                                 [[1, 2, 23, 30], [13, 2, 1, 1], [], [13], [], []]
                                             aten::view         0.00%       4.000us         0.00%       4.000us       1.333us             3                                                               [[13, 2, 1, 1], []]
                                             aten::view         0.00%       1.000us         0.00%       1.000us       0.500us             2                                                              [[1, 2, 23, 30], []]
                                           aten::detach         0.00%       3.000us         0.00%       3.000us       1.500us             2                                                                     [[1, 2, 690]]
                                          aten::reshape         0.00%       4.000us         0.00%       4.000us       4.000us             1                                                                        [[13], []]
                                   aten::_reshape_alias         0.00%       0.000us         0.00%       0.000us       0.000us             1                                                                    [[13], [], []]
                                            aten::copy_         0.01%       7.000us         0.01%       7.000us       7.000us             1                                                 [[1, 13, 23, 30], [13, 1, 1], []]
                               aten::cross_entropy_loss         0.02%      32.000us         0.14%     175.000us     175.000us             1                                    [[1, 13, 23, 30], [1, 23, 30], [], [], [], []]
                                      aten::log_softmax         0.02%      20.000us         0.08%     101.000us     101.000us             1                                                         [[1, 13, 23, 30], [], []]
                                               aten::to         0.00%       0.000us         0.00%       0.000us       0.000us             1                                     [[1, 13, 23, 30], [], [], [], [], [], [], []]
                                     aten::_log_softmax         0.06%      81.000us         0.06%      81.000us      81.000us             1                                                         [[1, 13, 23, 30], [], []]
                                      aten::nll_loss_nd         0.00%       3.000us         0.03%      42.000us      42.000us             1                                        [[1, 13, 23, 30], [1, 23, 30], [], [], []]
                                       aten::nll_loss2d         0.01%       7.000us         0.03%      39.000us      39.000us             1                                        [[1, 13, 23, 30], [1, 23, 30], [], [], []]
                               aten::nll_loss2d_forward         0.02%      30.000us         0.02%      32.000us      32.000us             1                                        [[1, 13, 23, 30], [1, 23, 30], [], [], []]
                                        aten::ones_like         0.01%      14.000us         0.02%      23.000us      23.000us             1                                                          [[], [], [], [], [], []]
                                       aten::empty_like         0.00%       5.000us         0.01%       8.000us       8.000us             1                                                          [[], [], [], [], [], []]
                                    aten::empty_strided         0.01%      10.000us         0.01%      10.000us       5.000us             2                                                          [[], [], [], [], [], []]
                                            aten::fill_         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                                          [[], []]
autograd::engine::evaluate_function: NllLoss2DBackwa...         0.01%      10.000us         0.10%     124.000us     124.000us             1                                                                                []
                                     NllLoss2DBackward0         0.02%      32.000us         0.09%     114.000us     114.000us             1                                                                              [[]]
                              aten::nll_loss2d_backward         0.03%      38.000us         0.06%      82.000us      82.000us             1                                [[], [1, 13, 23, 30], [1, 23, 30], [], [], [], []]
                                       aten::zeros_like         0.01%       7.000us         0.03%      39.000us      39.000us             1                                             [[1, 13, 23, 30], [], [], [], [], []]
                                       aten::empty_like         0.00%       5.000us         0.01%      12.000us      12.000us             1                                             [[1, 13, 23, 30], [], [], [], [], []]
                                            aten::zero_         0.02%      21.000us         0.02%      21.000us      10.500us             2                                                                 [[1, 13, 23, 30]]
                                       aten::resize_as_         0.00%       4.000us         0.00%       4.000us       4.000us             1                                            [[1, 13, 23, 30], [1, 13, 23, 30], []]
                                          aten::resize_         0.00%       0.000us         0.00%       0.000us       0.000us             1                                                         [[1, 13, 23, 30], [], []]
autograd::engine::evaluate_function: LogSoftmaxBackw...         0.00%       4.000us         0.07%      85.000us      85.000us             1                                                                                []
                                    LogSoftmaxBackward0         0.02%      26.000us         0.06%      81.000us      81.000us             1                                                                 [[1, 13, 23, 30]]
                       aten::_log_softmax_backward_data         0.04%      55.000us         0.04%      55.000us      55.000us             1                                        [[1, 13, 23, 30], [1, 13, 23, 30], [], []]
autograd::engine::evaluate_function: ConvolutionBack...         0.01%      15.000us         0.27%     344.000us     172.000us             2                                                                                []
                                   ConvolutionBackward0         0.01%      15.000us         0.18%     228.000us     228.000us             1                                                                 [[1, 13, 23, 30]]
                             aten::convolution_backward         0.04%      47.000us         0.16%     213.000us     213.000us             1  [[1, 13, 23, 30], [1, 2, 23, 30], [13, 2, 1, 1], [], [], [], [], [], [], [], []]
                            aten::_slow_conv2d_backward         0.08%     100.000us         0.13%     166.000us     166.000us             1                  [[1, 13, 23, 30], [1, 2, 23, 30], [13, 2, 1, 1], [], [], [], []]
                                       aten::resize_as_         0.00%       3.000us         0.00%       4.000us       4.000us             1                                                         [[0], [1, 2, 23, 30], []]
                                            aten::zero_         0.00%       0.000us         0.00%       0.000us       0.000us             1                                                                  [[1, 2, 23, 30]]
                                              aten::sum         0.04%      46.000us         0.04%      55.000us      55.000us             1                                                [[1, 13, 23, 30], [], [], [], [0]]
                                       aten::as_strided         0.00%       2.000us         0.00%       2.000us       2.000us             1                                                                [[13], [], [], []]
                                            aten::fill_         0.01%       7.000us         0.01%       7.000us       7.000us             1                                                               [[1, 13, 1, 1], []]
                                            aten::zero_         0.00%       0.000us         0.00%       0.000us       0.000us             1                                                                   [[13, 2, 1, 1]]
autograd::engine::evaluate_function: torch::autograd...         0.01%      19.000us         0.04%      46.000us      11.500us             4                                                                                []
                        torch::autograd::AccumulateGrad         0.00%       4.000us         0.01%      10.000us      10.000us             1                                                                   [[13, 2, 1, 1]]
                                           aten::detach         0.00%       3.000us         0.00%       6.000us       6.000us             1                                                                   [[13, 2, 1, 1]]
                                                 detach         0.00%       3.000us         0.00%       3.000us       3.000us             1                                                                   [[13, 2, 1, 1]]
                        torch::autograd::AccumulateGrad         0.00%       2.000us         0.00%       5.000us       5.000us             1                                                                            [[13]]
                                           aten::detach         0.00%       2.000us         0.00%       3.000us       3.000us             1                                                                            [[13]]
                                                 detach         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                                            [[13]]
                                   ConvolutionBackward0         0.00%       6.000us         0.08%     101.000us     101.000us             1                                                                  [[1, 2, 23, 30]]
                             aten::convolution_backward         0.01%      11.000us         0.07%      95.000us      95.000us             1    [[1, 2, 23, 30], [1, 3, 23, 30], [2, 3, 3, 3], [], [], [], [], [], [], [], []]
                            aten::_slow_conv2d_backward         0.04%      56.000us         0.06%      84.000us      84.000us             1                    [[1, 2, 23, 30], [1, 3, 23, 30], [2, 3, 3, 3], [], [], [], []]
                                              aten::sum         0.01%      11.000us         0.01%      13.000us      13.000us             1                                                 [[1, 2, 23, 30], [], [], [], [0]]
                                       aten::as_strided         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                                 [[2], [], [], []]
                                            aten::fill_         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                                [[1, 2, 1, 1], []]
                                            aten::zero_         0.00%       0.000us         0.00%       0.000us       0.000us             1                                                                    [[2, 3, 3, 3]]
                        torch::autograd::AccumulateGrad         0.00%       3.000us         0.01%       7.000us       7.000us             1                                                                    [[2, 3, 3, 3]]
                                           aten::detach         0.00%       2.000us         0.00%       4.000us       4.000us             1                                                                    [[2, 3, 3, 3]]
                                                 detach         0.00%       2.000us         0.00%       2.000us       2.000us             1                                                                    [[2, 3, 3, 3]]
                        torch::autograd::AccumulateGrad         0.00%       2.000us         0.00%       5.000us       5.000us             1                                                                             [[2]]
                                           aten::detach         0.00%       2.000us         0.00%       3.000us       3.000us             1                                                                             [[2]]
                                                 detach         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                                             [[2]]
                                     cudaGetDeviceCount         0.00%       0.000us         0.00%       0.000us       0.000us             1                                                                                []
                                cudaGetDeviceProperties         0.20%     256.000us         0.20%     256.000us     256.000us             1                                                                                []
                                  cudaDeviceSynchronize        85.94%     111.079ms        85.94%     111.079ms     111.079ms             1                                                                                []
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------
Self CPU time total: 129.249ms

## profile_batch_size_4.log
UNet(
  (down_path): ModuleList(
    (0): UNetConvBlock(
      (block): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
  )
  (last): Conv2d(2, 13, kernel_size=(1, 1), stride=(1, 1))
)
STAGE:2022-12-30 16:54:22 172566:172566 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
torch.Size([4, 13, 23, 30]) torch.Size([4, 23, 30])

Before HANG 3.179154872894287


AFTER HANG

STAGE:2022-12-30 16:54:23 172566:172566 ActivityProfilerController.cpp:300] Completed Stage: Collection
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls                                                                      Input Shapes
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------
                                            aten::empty         0.03%      58.000us         0.03%      58.000us       2.762us            21                                                          [[], [], [], [], [], []]
                                          aten::random_         0.01%      21.000us         0.01%      21.000us      21.000us             1                                                                          [[], []]
                                             aten::item         0.01%      10.000us         0.01%      15.000us       7.500us             2                                                                              [[]]
                              aten::_local_scalar_dense         0.00%       5.000us         0.00%       5.000us       2.500us             2                                                                              [[]]
                                            aten::zeros         0.03%      48.000us         0.03%      61.000us      20.333us             3                                                              [[], [], [], [], []]
                                            aten::zero_         0.00%       3.000us         0.00%       3.000us       1.000us             3                                                                             [[1]]
enumerate(DataLoader)#_MultiProcessingDataLoaderIter...         9.26%      16.620ms         9.28%      16.653ms       8.326ms             2                                                                                []
                                               aten::to         0.00%       1.000us         0.00%       1.000us       0.500us             2                                                         [[0], [], [], [], [], []]
                                       aten::lift_fresh         0.00%       2.000us         0.00%       2.000us       1.000us             2                                                                             [[0]]
                                          aten::detach_         0.00%       4.000us         0.00%       5.000us       2.500us             2                                                                             [[0]]
                                                detach_         0.00%       1.000us         0.00%       1.000us       0.500us             2                                                                             [[0]]
                                             aten::set_         0.01%      13.000us         0.01%      13.000us       6.500us             2                                                             [[0], [], [], [], []]
                                               aten::to         0.00%       0.000us         0.00%       0.000us       0.000us             1                                      [[4, 3, 23, 30], [], [], [], [], [], [], []]
                                               aten::to         0.00%       0.000us         0.00%       0.000us       0.000us             1                                         [[4, 23, 30], [], [], [], [], [], [], []]
                      Optimizer.zero_grad#SGD.zero_grad         0.01%      12.000us         0.01%      12.000us      12.000us             1                                                                                []
                                           aten::conv2d         0.01%      12.000us         1.15%       2.057ms       2.057ms             1                               [[4, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], []]
                                      aten::convolution         0.02%      35.000us         1.14%       2.045ms       2.045ms             1                       [[4, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], [], [], []]
                                     aten::_convolution         0.01%      16.000us         1.12%       2.010ms       2.010ms             1       [[4, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], [], [], [], [], [], [], []]
                               aten::mkldnn_convolution         1.10%       1.978ms         1.11%       1.994ms       1.994ms             1                               [[4, 3, 23, 30], [2, 3, 3, 3], [2], [], [], [], []]
                                      aten::as_strided_         0.01%      11.000us         0.01%      11.000us       5.500us             2                                                      [[4, 2, 23, 30], [], [], []]
                                           aten::conv2d         0.00%       3.000us         0.27%     476.000us     476.000us             1                             [[4, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], []]
                                      aten::convolution         0.01%      14.000us         0.26%     473.000us     473.000us             1                     [[4, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], [], [], []]
                                     aten::_convolution         0.00%       7.000us         0.26%     459.000us     459.000us             1     [[4, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], [], [], [], [], [], [], []]
                               aten::mkldnn_convolution         0.25%     448.000us         0.25%     452.000us     452.000us             1                             [[4, 2, 23, 30], [13, 2, 1, 1], [13], [], [], [], []]
                                      aten::as_strided_         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                     [[4, 13, 23, 30], [], [], []]
                               aten::cross_entropy_loss         0.02%      28.000us         0.18%     327.000us     327.000us             1                                    [[4, 13, 23, 30], [4, 23, 30], [], [], [], []]
                                      aten::log_softmax         0.01%      12.000us         0.13%     231.000us     231.000us             1                                                         [[4, 13, 23, 30], [], []]
                                               aten::to         0.00%       0.000us         0.00%       0.000us       0.000us             1                                     [[4, 13, 23, 30], [], [], [], [], [], [], []]
                                     aten::_log_softmax         0.12%     219.000us         0.12%     219.000us     219.000us             1                                                         [[4, 13, 23, 30], [], []]
                                      aten::nll_loss_nd         0.00%       3.000us         0.04%      68.000us      68.000us             1                                        [[4, 13, 23, 30], [4, 23, 30], [], [], []]
                                       aten::nll_loss2d         0.00%       3.000us         0.04%      65.000us      65.000us             1                                        [[4, 13, 23, 30], [4, 23, 30], [], [], []]
                               aten::nll_loss2d_forward         0.03%      60.000us         0.03%      62.000us      62.000us             1                                        [[4, 13, 23, 30], [4, 23, 30], [], [], []]
                                          aten::resize_         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                                     [[0], [], []]
                                        aten::ones_like         0.01%      24.000us         0.02%      32.000us      32.000us             1                                                          [[], [], [], [], [], []]
                                       aten::empty_like         0.00%       4.000us         0.00%       8.000us       8.000us             1                                                          [[], [], [], [], [], []]
                                    aten::empty_strided         0.01%      15.000us         0.01%      15.000us       7.500us             2                                                          [[], [], [], [], [], []]
                                            aten::fill_         0.00%       0.000us         0.00%       0.000us       0.000us             1                                                                          [[], []]
autograd::engine::evaluate_function: NllLoss2DBackwa...         0.01%      16.000us         0.11%     198.000us     198.000us             1                                                                                []
                                     NllLoss2DBackward0         0.02%      34.000us         0.10%     182.000us     182.000us             1                                                                              [[]]
                              aten::nll_loss2d_backward         0.02%      30.000us         0.08%     148.000us     148.000us             1                                [[], [4, 13, 23, 30], [4, 23, 30], [], [], [], []]
                                       aten::zeros_like         0.00%       5.000us         0.06%      99.000us      99.000us             1                                             [[4, 13, 23, 30], [], [], [], [], []]
                                       aten::empty_like         0.00%       6.000us         0.01%      17.000us      17.000us             1                                             [[4, 13, 23, 30], [], [], [], [], []]
                                            aten::zero_         0.00%       5.000us         0.05%      89.000us      44.500us             2                                                                 [[4, 13, 23, 30]]
                                            aten::fill_         0.05%      84.000us         0.05%      84.000us      42.000us             2                                                             [[4, 13, 23, 30], []]
                                       aten::resize_as_         0.00%       7.000us         0.00%       7.000us       7.000us             1                                            [[4, 13, 23, 30], [4, 13, 23, 30], []]
                                          aten::resize_         0.00%       0.000us         0.00%       0.000us       0.000us             1                                                         [[4, 13, 23, 30], [], []]
autograd::engine::evaluate_function: LogSoftmaxBackw...         0.00%       5.000us         0.08%     135.000us     135.000us             1                                                                                []
                                    LogSoftmaxBackward0         0.01%      22.000us         0.07%     130.000us     130.000us             1                                                                 [[4, 13, 23, 30]]
                       aten::_log_softmax_backward_data         0.06%     108.000us         0.06%     108.000us     108.000us             1                                        [[4, 13, 23, 30], [4, 13, 23, 30], [], []]
autograd::engine::evaluate_function: ConvolutionBack...         0.01%      21.000us         1.10%       1.970ms     985.000us             2                                                                                []
                                   ConvolutionBackward0         0.02%      28.000us         0.69%       1.241ms       1.241ms             1                                                                 [[4, 13, 23, 30]]
                             aten::convolution_backward         0.67%       1.198ms         0.68%       1.213ms       1.213ms             1  [[4, 13, 23, 30], [4, 2, 23, 30], [13, 2, 1, 1], [], [], [], [], [], [], [], []]
                                      aten::as_strided_         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                                [[13], [], [], []]
                                      aten::as_strided_         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                       [[13, 2, 1, 1], [], [], []]
autograd::engine::evaluate_function: torch::autograd...         0.00%       7.000us         0.02%      33.000us       8.250us             4                                                                                []
                        torch::autograd::AccumulateGrad         0.00%       5.000us         0.01%      12.000us      12.000us             1                                                                   [[13, 2, 1, 1]]
                                           aten::detach         0.00%       2.000us         0.00%       7.000us       7.000us             1                                                                   [[13, 2, 1, 1]]
                                                 detach         0.00%       5.000us         0.00%       5.000us       5.000us             1                                                                   [[13, 2, 1, 1]]
                        torch::autograd::AccumulateGrad         0.00%       1.000us         0.00%       3.000us       3.000us             1                                                                            [[13]]
                                           aten::detach         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                                            [[13]]
                                                 detach         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                                            [[13]]
                                   ConvolutionBackward0         0.00%       6.000us         0.39%     708.000us     708.000us             1                                                                  [[4, 2, 23, 30]]
                             aten::convolution_backward         0.39%     695.000us         0.39%     702.000us     702.000us             1    [[4, 2, 23, 30], [4, 3, 23, 30], [2, 3, 3, 3], [], [], [], [], [], [], [], []]
                                      aten::as_strided_         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                                 [[2], [], [], []]
                                      aten::as_strided_         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                        [[2, 3, 3, 3], [], [], []]
                        torch::autograd::AccumulateGrad         0.00%       4.000us         0.00%       8.000us       8.000us             1                                                                    [[2, 3, 3, 3]]
                                           aten::detach         0.00%       1.000us         0.00%       4.000us       4.000us             1                                                                    [[2, 3, 3, 3]]
                                                 detach         0.00%       3.000us         0.00%       3.000us       3.000us             1                                                                    [[2, 3, 3, 3]]
                        torch::autograd::AccumulateGrad         0.00%       1.000us         0.00%       3.000us       3.000us             1                                                                             [[2]]
                                           aten::detach         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                                             [[2]]
                                                 detach         0.00%       1.000us         0.00%       1.000us       1.000us             1                                                                             [[2]]
                                     cudaGetDeviceCount         0.00%       2.000us         0.00%       2.000us       2.000us             1                                                                                []
                                cudaGetDeviceProperties         0.15%     262.000us         0.15%     262.000us     262.000us             1                                                                                []
                                  cudaDeviceSynchronize        87.59%     157.179ms        87.59%     157.179ms     157.179ms             1                                                                                []
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------
Self CPU time total: 179.442ms

## stacktrace.log
#0  0x00007fca0afed99f in __GI___poll (fds=0x7fc9ebbd0040, nfds=1, timeout=5000) at ../sysdeps/unix/sysv/linux/poll.c:29
#1  0x0000000000636b90 in ?? ()
#2  0x00000000005048b3 in ?? ()
#3  0x000000000056b1da in _PyEval_EvalFrameDefault ()
#4  0x000000000056939a in _PyEval_EvalCodeWithName ()
#5  0x00000000005f6a13 in _PyFunction_Vectorcall ()
#6  0x000000000056b1da in _PyEval_EvalFrameDefault ()
#7  0x000000000056939a in _PyEval_EvalCodeWithName ()
#8  0x00000000005f6a13 in _PyFunction_Vectorcall ()
#9  0x000000000056b0ae in _PyEval_EvalFrameDefault ()
#10 0x00000000005f6836 in _PyFunction_Vectorcall ()
#11 0x000000000056b1da in _PyEval_EvalFrameDefault ()
#12 0x000000000056939a in _PyEval_EvalCodeWithName ()
#13 0x000000000050aaa0 in ?? ()
#14 0x0000000000570035 in _PyEval_EvalFrameDefault ()
#15 0x000000000056939a in _PyEval_EvalCodeWithName ()
#16 0x000000000050aaa0 in ?? ()
#17 0x000000000056c28c in _PyEval_EvalFrameDefault ()
#18 0x00000000005f6836 in _PyFunction_Vectorcall ()
#19 0x00000000005f3547 in PyObject_Call ()
#20 0x000000000056c8cd in _PyEval_EvalFrameDefault ()
#21 0x00000000005f6836 in _PyFunction_Vectorcall ()
#22 0x000000000056b1da in _PyEval_EvalFrameDefault ()
#23 0x000000000056939a in _PyEval_EvalCodeWithName ()
#24 0x000000000050aaa0 in ?? ()
#25 0x000000000056c28c in _PyEval_EvalFrameDefault ()
#26 0x00000000005f6836 in _PyFunction_Vectorcall ()
#27 0x000000000056b1da in _PyEval_EvalFrameDefault ()
#28 0x000000000059bdfb in ?? ()
#29 0x00000000005f3d7f in _PyObject_MakeTpCall ()                                                                                                                                                                       [37/691]
#30 0x0000000000570266 in _PyEval_EvalFrameDefault ()
#31 0x00000000005f6836 in _PyFunction_Vectorcall ()
#32 0x0000000000570035 in _PyEval_EvalFrameDefault ()
#33 0x00000000005f6836 in _PyFunction_Vectorcall ()
#34 0x0000000000570035 in _PyEval_EvalFrameDefault ()
#35 0x00000000005f6836 in _PyFunction_Vectorcall ()
#36 0x000000000056b1da in _PyEval_EvalFrameDefault ()
#37 0x000000000056939a in _PyEval_EvalCodeWithName ()
#38 0x000000000059bf26 in ?? ()
#39 0x00000000005f3d7f in _PyObject_MakeTpCall ()
#40 0x0000000000570266 in _PyEval_EvalFrameDefault ()
#41 0x00000000005f6836 in _PyFunction_Vectorcall ()
#42 0x000000000056b1da in _PyEval_EvalFrameDefault ()
#43 0x00000000005f6836 in _PyFunction_Vectorcall ()
#44 0x00000000005a7b41 in ?? ()
#45 0x000000000069e876 in ?? ()
#46 0x00000000005121b8 in PyObject_GetIter ()
#47 0x0000000000614891 in ?? ()
#48 0x00000000005f3d03 in _PyObject_MakeTpCall ()
#49 0x0000000000570266 in _PyEval_EvalFrameDefault ()
#50 0x00000000005f6836 in _PyFunction_Vectorcall ()
#51 0x000000000056b0ae in _PyEval_EvalFrameDefault ()
#52 0x000000000056939a in _PyEval_EvalCodeWithName ()
#53 0x000000000068d047 in PyEval_EvalCode ()
#54 0x000000000067e351 in ?? ()
#55 0x000000000067e3cf in ?? ()
#56 0x000000000067e471 in ?? ()
#57 0x000000000067e817 in PyRun_SimpleFileExFlags ()
#58 0x00000000006b6fe2 in Py_RunMain ()
#59 0x00000000006b736d in Py_BytesMain ()
#60 0x00007fca0aeff083 in __libc_start_main (main=0x4eead0 <main>, argc=13, argv=0x7ffecda1a858, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7ffecda1a848) at ../csu/libc-start.c:308
#61 0x00000000005fa5ce in _start ()
	import torch
	from torch import nn
	from torch.optim import SGD
	from torch.utils import data
	from torch.utils.data import DataLoader


	class UNet(nn.Module):
	def __init__(
	self,
	in_channels=3,
	n_classes=2,
	depth=1,
	wf=1,
	padding=True
	):
	super().__init__()
	self.padding = padding
	self.depth = depth
	prev_channels = in_channels
	self.down_path = nn.ModuleList()
	for i in range(depth):
	self.down_path.append(
	UNetConvBlock(prev_channels, 2 ** (wf + i), padding)
	)
	prev_channels = 2 ** (wf + i)

	self.last = nn.Conv2d(prev_channels, n_classes, kernel_size=1)

	def forward(self, x):
	for i, down in enumerate(self.down_path):
	x = down(x)
	x = self.last(x)
	return x


	class UNetConvBlock(nn.Module):
	def __init__(self, in_size, out_size, padding):
	super().__init__()
	self.block = nn.Conv2d(in_size, out_size, kernel_size=3, padding=int(padding))

	def forward(self, x):
	out = self.block(x)
	return out


	class MockDataset(data.Dataset):
	def __init__(self):
	super().__init__()

	def __len__(self):
	return 6

	def __getitem__(self, idx):
	image = torch.rand((3, 23, 30))
	target = torch.randint(0, 1, (23, 30))
	return image, target


	def main():
	train_set = MockDataset()
	train_loader = DataLoader(train_set, batch_size=4, num_workers=1, drop_last=True)
	model = UNet(n_classes=13)
	print(model)
	device = 'cpu'
	model.to(device)
	optimizer = SGD(model.parameters(), lr=1e-3)
	for epoch in range(1):
	model.train()
	for step, batch_data in enumerate(train_loader):
	inputs = batch_data[0].to(device)
	labels = batch_data[1].to(device)
	optimizer.zero_grad()
	outputs = model(inputs)
	print(outputs.shape, labels.shape)
	loss = nn.CrossEntropyLoss()(outputs, labels)
	print(f"\nBefore HANG {loss}\n")
	loss.backward()
	print("\nAFTER HANG\n")


	if __name__ == '__main__':
	main()
	MKLDNN_VERBOSE=2 python3 examples/torch/semantic_segmentation/main.py
	UNet(
	(down_path): ModuleList(
	(0): UNetConvBlock(
	(block): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	)
	)
	(last): Conv2d(2, 13, kernel_size=(1, 1), stride=(1, 1))
	)
	onednn_verbose,info,oneDNN v2.6.0 (commit 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
	onednn_verbose,info,cpu,runtime:OpenMP,nthr:18
	onednn_verbose,info,cpu,isa:Intel AVX-512 with Intel DL Boost
	onednn_verbose,info,gpu,runtime:none
	onednn_verbose,info,prim_template:operation,engine,primitive,implementation,prop_kind,memory_descriptors,attributes,auxiliary,problem_desc,exec_time
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:Acdb16a:f0,attr-scratchpad:user ,,2x3x3x3,0.197998
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:Acdb16a:f0,attr-scratchpad:user ,,2x3x3x3,0.0012207
	onednn_verbose,create:cache_miss,cpu,convolution,jit:avx512_core,forward_training,src_f32::blocked:abcd:f0 wei_f32:p:blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic3oc2_ih23oh23kh3sh1dh0ph1_iw30ow30kw3sw1dw0pw1,0.148926
	onednn_verbose,exec,cpu,convolution,jit:avx512_core,forward_training,src_f32::blocked:abcd:f0 wei_f32:p:blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic3oc2_ih23oh23kh3sh1dh0ph1_iw30ow30kw3sw1dw0pw1,0.615967
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.193848
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.0161133
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0319824
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0310059
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16b16a:f0,attr-scratchpad:user ,,13x2x1x1,0.0319824
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16b16a:f0,attr-scratchpad:user ,,13x2x1x1,0.000976562
	onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,forward_training,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0908203
	onednn_verbose,exec,cpu,convolution,jit_1x1:avx512_core,forward_training,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0681152
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x13x23x30,0.172852
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x13x23x30,0.0168457
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0390625
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.032959
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16a16b:f0,attr-scratchpad:user ,,13x2x1x1,0.0319824
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16a16b:f0,attr-scratchpad:user ,,13x2x1x1,0.000976562
	onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,backward_data,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16a16b:f0 bia_undef::undef::f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0720215
	onednn_verbose,exec,cpu,convolution,jit_1x1:avx512_core,backward_data,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16a16b:f0 bia_undef::undef::f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0300293
	onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.00195312
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.0100098
	onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.000976562
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0109863
	onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.000976562
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0100098
	onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,backward_weights,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.225098
	$ MKLDNN_VERBOSE=2 python3 examples/torch/semantic_segmentation/main.py
	UNet(
	(down_path): ModuleList(
	(0): UNetConvBlock(
	(block): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	)
	)
	(last): Conv2d(2, 13, kernel_size=(1, 1), stride=(1, 1))
	)
	onednn_verbose,info,oneDNN v2.7.2 (commit fbec3e25a559ee252022ae066817b204e106a6ba)
	onednn_verbose,info,cpu,runtime:OpenMP,nthr:18
	onednn_verbose,info,cpu,isa:Intel AVX-512 with Intel DL Boost
	onednn_verbose,info,gpu,runtime:none
	onednn_verbose,info,prim_template:operation,engine,primitive,implementation,prop_kind,memory_descriptors,attributes,auxiliary,problem_desc,exec_time
	onednn_verbose,create:cache_miss,cpu,convolution,jit:avx512_core,forward_training,src_f32::blocked:abcd:f0 wei_f32:p:blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic3oc2_ih23oh23kh3sh1dh0ph1_iw30ow30kw3sw1dw0pw1,0.143066
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:Acdb16a:f0,attr-scratchpad:user ,,2x3x3x3,0.0349121
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:Acdb16a:f0,attr-scratchpad:user ,,2x3x3x3,0.00219727
	onednn_verbose,exec,cpu,convolution,jit:avx512_core,forward_training,src_f32::blocked:abcd:f0 wei_f32:p:blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic3oc2_ih23oh23kh3sh1dh0ph1_iw30ow30kw3sw1dw0pw1,0.697998
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.141113
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.0151367
	onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,forward_training,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0791016
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0200195
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0290527
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16b16a:f0,attr-scratchpad:user ,,13x2x1x1,0.0251465
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16b16a:f0,attr-scratchpad:user ,,13x2x1x1,0.000976562
	onednn_verbose,exec,cpu,convolution,jit_1x1:avx512_core,forward_training,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0471191
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x13x23x30,0.138184
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x13x23x30,0.0109863
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0300293
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0300293
	onednn_verbose,create:cache_miss,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16a16b:f0,attr-scratchpad:user ,,13x2x1x1,0.0229492
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:ABcd16a16b:f0,attr-scratchpad:user ,,13x2x1x1,0
	onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,backward_data,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16a16b:f0 bia_undef::undef::f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.0578613
	onednn_verbose,exec,cpu,convolution,jit_1x1:avx512_core,backward_data,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16a16b:f0 bia_undef::undef::f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.027832
	onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.00195312
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32:p:blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,attr-scratchpad:user ,,4x2x23x30,0.00878906
	onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.0012207
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x13x23x30,0.00805664
	onednn_verbose,create:cache_hit,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.000976562
	onednn_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,,4x2x23x30,0.0239258
	onednn_verbose,create:cache_miss,cpu,convolution,jit_1x1:avx512_core,backward_weights,src_f32:p:blocked:aBcd16b:f0 wei_f32:p:blocked:ABcd16b16a:f0 bia_f32::blocked:a:f0 dst_f32:p:blocked:aBcd16b:f0,attr-scratchpad:user ,alg:convolution_direct,mb4_ic2oc13_ih23oh23kh1sh1dh0ph0_iw30ow30kw1sw1dw0pw0,0.156006
	#0 0x00007fca0afed99f in __GI___poll (fds=0x7fc9ebbd0040, nfds=1, timeout=5000) at ../sysdeps/unix/sysv/linux/poll.c:29
	#1 0x0000000000636b90 in ?? ()
	#2 0x00000000005048b3 in ?? ()
	#3 0x000000000056b1da in _PyEval_EvalFrameDefault ()
	#4 0x000000000056939a in _PyEval_EvalCodeWithName ()
	#5 0x00000000005f6a13 in _PyFunction_Vectorcall ()
	#6 0x000000000056b1da in _PyEval_EvalFrameDefault ()
	#7 0x000000000056939a in _PyEval_EvalCodeWithName ()
	#8 0x00000000005f6a13 in _PyFunction_Vectorcall ()
	#9 0x000000000056b0ae in _PyEval_EvalFrameDefault ()
	#10 0x00000000005f6836 in _PyFunction_Vectorcall ()
	#11 0x000000000056b1da in _PyEval_EvalFrameDefault ()
	#12 0x000000000056939a in _PyEval_EvalCodeWithName ()
	#13 0x000000000050aaa0 in ?? ()
	#14 0x0000000000570035 in _PyEval_EvalFrameDefault ()
	#15 0x000000000056939a in _PyEval_EvalCodeWithName ()
	#16 0x000000000050aaa0 in ?? ()
	#17 0x000000000056c28c in _PyEval_EvalFrameDefault ()
	#18 0x00000000005f6836 in _PyFunction_Vectorcall ()
	#19 0x00000000005f3547 in PyObject_Call ()
	#20 0x000000000056c8cd in _PyEval_EvalFrameDefault ()
	#21 0x00000000005f6836 in _PyFunction_Vectorcall ()
	#22 0x000000000056b1da in _PyEval_EvalFrameDefault ()
	#23 0x000000000056939a in _PyEval_EvalCodeWithName ()
	#24 0x000000000050aaa0 in ?? ()
	#25 0x000000000056c28c in _PyEval_EvalFrameDefault ()
	#26 0x00000000005f6836 in _PyFunction_Vectorcall ()
	#27 0x000000000056b1da in _PyEval_EvalFrameDefault ()
	#28 0x000000000059bdfb in ?? ()
	#29 0x00000000005f3d7f in _PyObject_MakeTpCall () [37/691]
	#30 0x0000000000570266 in _PyEval_EvalFrameDefault ()
	#31 0x00000000005f6836 in _PyFunction_Vectorcall ()
	#32 0x0000000000570035 in _PyEval_EvalFrameDefault ()
	#33 0x00000000005f6836 in _PyFunction_Vectorcall ()
	#34 0x0000000000570035 in _PyEval_EvalFrameDefault ()
	#35 0x00000000005f6836 in _PyFunction_Vectorcall ()
	#36 0x000000000056b1da in _PyEval_EvalFrameDefault ()
	#37 0x000000000056939a in _PyEval_EvalCodeWithName ()
	#38 0x000000000059bf26 in ?? ()
	#39 0x00000000005f3d7f in _PyObject_MakeTpCall ()
	#40 0x0000000000570266 in _PyEval_EvalFrameDefault ()
	#41 0x00000000005f6836 in _PyFunction_Vectorcall ()
	#42 0x000000000056b1da in _PyEval_EvalFrameDefault ()
	#43 0x00000000005f6836 in _PyFunction_Vectorcall ()
	#44 0x00000000005a7b41 in ?? ()
	#45 0x000000000069e876 in ?? ()
	#46 0x00000000005121b8 in PyObject_GetIter ()
	#47 0x0000000000614891 in ?? ()
	#48 0x00000000005f3d03 in _PyObject_MakeTpCall ()
	#49 0x0000000000570266 in _PyEval_EvalFrameDefault ()
	#50 0x00000000005f6836 in _PyFunction_Vectorcall ()
	#51 0x000000000056b0ae in _PyEval_EvalFrameDefault ()
	#52 0x000000000056939a in _PyEval_EvalCodeWithName ()
	#53 0x000000000068d047 in PyEval_EvalCode ()
	#54 0x000000000067e351 in ?? ()
	#55 0x000000000067e3cf in ?? ()
	#56 0x000000000067e471 in ?? ()
	#57 0x000000000067e817 in PyRun_SimpleFileExFlags ()
	#58 0x00000000006b6fe2 in Py_RunMain ()
	#59 0x00000000006b736d in Py_BytesMain ()
	#60 0x00007fca0aeff083 in __libc_start_main (main=0x4eead0 <main>, argc=13, argv=0x7ffecda1a858, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7ffecda1a848) at ../csu/libc-start.c:308
	#61 0x00000000005fa5ce in _start ()