Erotemic/gist:e6be4658bad9fa28a56bf4e7c516a820

## gistfile1.txt
Timer unit: 1e-06 s


Pystone time: 0.05441 s
File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
Function: _get_boxes at line 202

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   202                                               @profiler.profile
   203                                               def _get_boxes(self, output, mode=1):
   204                                                   """
   205                                                   Returns array of detections for every image in batch
   206
   207                                                   Examples:
   208                                                       >>> import torch
   209                                                       >>> torch.random.manual_seed(0)
   210                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
   211                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
   212                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
   213                                                       >>> output = torch.randn(16, 125, 9, 9)
   214                                                       >>> from netharn import XPU
   215                                                       >>> output = XPU.cast('gpu').move(output)
   216                                                       >>> boxes = self._get_boxes(output.data)
   217                                                       >>> assert len(boxes) == 16
   218                                                       >>> assert all(len(b[0]) == 6 for b in boxes)
   219
   220                                                   Benchmark:
   221                                                       >>> from netharn.models.yolo2.light_postproc import *
   222                                                       >>> import torch
   223                                                       >>> torch.random.manual_seed(0)
   224                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
   225                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
   226                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
   227                                                       >>> output = torch.randn(16, 125, 9, 9)
   228                                                       >>> from netharn import XPU
   229                                                       >>> output = XPU.cast('gpu').move(output)
   230                                                       >>> for timer in ub.Timerit(100, bestof=10, label='mode 0'):
   231                                                       >>>     output_ = output.clone()
   232                                                       >>>     with timer:
   233                                                       >>>         boxes0 = self._get_boxes(output_.data, mode=0)
   234                                                       >>> for timer in ub.Timerit(100, bestof=10, label='mode 1'):
   235                                                       >>>     output_ = output.clone()
   236                                                       >>>     with timer:
   237                                                       >>>         boxes1 = self._get_boxes(output_.data, mode=1)
   238                                                       >>> for b0, b1 in zip(boxes0, boxes1):
   239                                                       >>>     assert np.all(b0.cpu() == b1.cpu())
   240
   241                                                       >>> from lightnet.data.postprocess import GetBoundingBoxes as GetBoundingBoxesOrig
   242                                                       >>> anchors_dict = dict(num=5, values=[1.3221,1.73145,3.19275,4.00944,5.05587,
   243                                                       >>>                               8.09892,9.47112,4.84053,11.2364,10.0071])
   244                                                       >>> post = GetBoundingBoxesOrig(anchors=anchors_dict, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
   245                                                       >>> for timer in ub.Timerit(100, bestof=10, label='original'):
   246                                                       >>>     output_ = output.clone()
   247                                                       >>>     with timer:
   248                                                       >>>         boxes3 = post._get_boxes(output_.data)
   249                                                       >>> # Check that the output is the same
   250                                                       >>> for b0, b3 in zip(boxes0, boxes3):
   251                                                       >>>     b3_ = torch.Tensor(b3)
   252                                                       >>>     assert np.all(b0.cpu() == b3_.cpu())
   253                                                   """
   254                                                   # dont modify inplace
   255        21        339.0     16.1      0.6          output = output.clone()
   256
   257                                                   # Check dimensions
   258        21        108.0      5.1      0.2          if output.dim() == 3:
   259                                                       output.unsqueeze_(0)
   260
   261                                                   # Variables
   262        21        118.0      5.6      0.2          cuda = output.is_cuda
   263        21        151.0      7.2      0.3          bsize = output.size(0)
   264        21         96.0      4.6      0.2          h = output.size(2)
   265        21         92.0      4.4      0.2          w = output.size(3)
   266
   267                                                   # Compute xc,yc, w,h, box_score on Tensor
   268        21        833.0     39.7      1.5          lin_x = torch.linspace(0, w - 1, w).repeat(h, 1).view(h * w)
   269        21        598.0     28.5      1.1          lin_y = torch.linspace(0, h - 1, h).repeat(w, 1).t().contiguous().view(h * w)
   270        21        575.0     27.4      1.1          anchor_w = torch.Tensor(self.anchors[::2]).view(1, self.num_anchors, 1)
   271        21        203.0      9.7      0.4          anchor_h = torch.Tensor(self.anchors[1::2]).view(
   272        21        184.0      8.8      0.3              1, self.num_anchors, 1)
   273        21        109.0      5.2      0.2          if cuda:
   274        21        656.0     31.2      1.2              lin_x = lin_x.cuda()
   275        21        338.0     16.1      0.6              lin_y = lin_y.cuda()
   276        21        314.0     15.0      0.6              anchor_w = anchor_w.cuda()
   277        21        304.0     14.5      0.6              anchor_h = anchor_h.cuda()
   278
   279                                                   # -1 == 5+num_classes (we can drop feature maps if 1 class)
   280        21        223.0     10.6      0.4          output_ = output.view(bsize, self.num_anchors, -1, h * w)
   281        21       1005.0     47.9      1.8          output_[:, :, 0, :].sigmoid_().add_(lin_x).div_(w)          # X center
   282        21        707.0     33.7      1.3          output_[:, :, 1, :].sigmoid_().add_(lin_y).div_(h)          # Y center
   283        21        761.0     36.2      1.4          output_[:, :, 2, :].exp_().mul_(anchor_w).div_(w)           # Width
   284        21        585.0     27.9      1.1          output_[:, :, 3, :].exp_().mul_(anchor_h).div_(h)           # Height
   285        21        309.0     14.7      0.6          output_[:, :, 4, :].sigmoid_()                              # Box score
   286
   287                                                   # Compute class_score
   288        21        101.0      4.8      0.2          if self.num_classes > 1:
   289        21        139.0      6.6      0.3              if torch.__version__.startswith('0.3'):
   290                                                           cls_scores = torch.nn.functional.softmax(
   291                                                               Variable(output_[:, :, 5:, :], volatile=True), 2).data
   292                                                       else:
   293        21        125.0      6.0      0.2                  cls_scores = torch.nn.functional.softmax(
   294        21       1123.0     53.5      2.1                      output_[:, :, 5:, :], 2)
   295        21        587.0     28.0      1.1              cls_max, cls_max_idx = torch.max(cls_scores, 2)
   296        21        428.0     20.4      0.8              cls_max.mul_(output_[:, :, 4, :])
   297                                                   else:
   298                                                       cls_max = output_[:, :, 4, :]
   299                                                       cls_max_idx = torch.zeros_like(cls_max)
   300
   301                                                   # Save detection if conf*class_conf is higher than threshold
   302
   303        21         95.0      4.5      0.2          if mode == 0:
   304                                                       output_ = output_.cpu()
   305                                                       cls_max = cls_max.cpu()
   306                                                       cls_max_idx = cls_max_idx.cpu()
   307                                                       boxes = []
   308                                                       for b in range(bsize):
   309                                                           box_batch = []
   310                                                           for a in range(self.num_anchors):
   311                                                               for i in range(h * w):
   312                                                                   if cls_max[b, a, i] > self.conf_thresh:
   313                                                                       box_batch.append([
   314                                                                           output_[b, a, 0, i],
   315                                                                           output_[b, a, 1, i],
   316                                                                           output_[b, a, 2, i],
   317                                                                           output_[b, a, 3, i],
   318                                                                           cls_max[b, a, i],
   319                                                                           cls_max_idx[b, a, i]
   320                                                                       ])
   321                                                           box_batch = torch.Tensor(box_batch)
   322                                                           boxes.append(box_batch)
   323        21         89.0      4.2      0.2          elif mode == 1 or mode == 2:
   324                                                       # Save detection if conf*class_conf is higher than threshold
   325        21        572.0     27.2      1.1              flags = cls_max > self.conf_thresh
   326        21        272.0     13.0      0.5              flat_flags = flags.view(-1)
   327
   328        21       1915.0     91.2      3.5              if not np.any(flat_flags):
   329                                                           return [torch.FloatTensor([]) for _ in range(bsize)]
   330
   331                                                       # number of potential detections per batch
   332        21        797.0     38.0      1.5              item_size = np.prod(flags.shape[1:])
   333        21        108.0      5.1      0.2              slices = [slice((item_size * i), (item_size * (i + 1)))
   334        21        415.0     19.8      0.8                        for i in range(bsize)]
   335                                                       # number of detections per batch (prepended with a zero)
   336        21        108.0      5.1      0.2              n_dets = torch.stack(
   337        21      14893.0    709.2     27.4                  [flat_flags[0].long() * 0] + [flat_flags[sl].long().sum() for sl in slices])
   338                                                       # indices of splits between filtered detections
   339        21       1523.0     72.5      2.8              filtered_split_idxs = torch.cumsum(n_dets, dim=0)
   340
   341                                                       # Do actual filtering of detections by confidence thresh
   342        21        710.0     33.8      1.3              flat_coords = output_.transpose(2, 3)[..., 0:4].clone().view(-1, 4)
   343        21        159.0      7.6      0.3              flat_class_max = cls_max.view(-1)
   344        21        167.0      8.0      0.3              flat_class_idx = cls_max_idx.view(-1)
   345
   346        21       2837.0    135.1      5.2              coords = flat_coords[flat_flags]
   347        21       1990.0     94.8      3.7              scores = flat_class_max[flat_flags]
   348        21       1443.0     68.7      2.7              cls_idxs = flat_class_idx[flat_flags]
   349
   350        21        251.0     12.0      0.5              filtered_dets = torch.cat([coords, scores[:, None],
   351        21       1032.0     49.1      1.9                                         cls_idxs[:, None].float()], dim=1)
   352
   353        21        100.0      4.8      0.2              boxes2 = []
   354       357       3904.0     10.9      7.2              for lx, rx in zip(filtered_split_idxs, filtered_split_idxs[1:]):
   355       336       8135.0     24.2     15.0                  batch_box = filtered_dets[lx:rx]
   356       336       1604.0      4.8      2.9                  boxes2.append(batch_box)
   357
   358        21         91.0      4.3      0.2              boxes = boxes2
   359
   360        21         89.0      4.2      0.2          return boxes


Pystone time: 0.094206 s
File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
Function: _clip_boxes at line 139

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   139                                               @profiler.profile
   140                                               def _clip_boxes(self, box):
   141                                                   """
   142                                                   CommandLine:
   143                                                       python ~/code/netharn/netharn/models/yolo2/light_postproc.py GetBoundingBoxes._clip_boxes
   144
   145                                                   Example:
   146                                                       >>> import torch
   147                                                       >>> torch.random.manual_seed(0)
   148                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
   149                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
   150                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
   151                                                       >>> # Make random boxes for one item in a batch
   152                                                       >>> box = torch.randn(7, 6)
   153                                                       >>> box[:, 0].sigmoid_()
   154                                                       >>> box[:, 1].sigmoid_()
   155                                                       >>> box.abs_()
   156                                                       >>> new_box = self._clip_boxes(box)
   157                                                       >>> box_tlbr = util.Boxes(box.cpu().numpy()[:, 0:4], 'cxywh').as_tlbr().data
   158                                                       >>> new_tlbr = util.Boxes(new_box.cpu().numpy()[:, 0:4], 'cxywh').as_tlbr().data
   159                                                       >>> #
   160                                                       >>> print('old')
   161                                                       >>> print(box_tlbr)
   162                                                       >>> print('new')
   163                                                       >>> print(new_tlbr)
   164                                                       >>> #print('trim_w = {}'.format(ub.repr2(trim_w.numpy(), precision=4)))
   165                                                       >>> #print('trim_h = {}'.format(ub.repr2(trim_h.numpy(), precision=4)))
   166                                                       >>> assert np.all(new_tlbr.T[2] <= 1.01)
   167                                                       >>> assert np.all(new_tlbr.T[2] >= -0.01)
   168                                                       >>> assert np.all(new_tlbr.T[3] <= 1.01)
   169                                                       >>> assert np.all(new_tlbr.T[3] >= -0.01)
   170                                                   """
   171       336       1332.0      4.0      1.4          if len(box) == 0:
   172                                                       return box
   173
   174       336       6518.0     19.4      6.9          cx, cy, w, h = box.t()[0:4]
   175
   176       336       7328.0     21.8      7.8          x1 = cx - (w / 2)
   177       336       5956.0     17.7      6.3          x2 = cx + (w / 2)
   178
   179       336       5686.0     16.9      6.0          y1 = cy - (h / 2)
   180       336       5466.0     16.3      5.8          y2 = cy + (h / 2)
   181
   182       336       9010.0     26.8      9.6          trim_w1 = (0 - x1).clamp(0, None)
   183       336       5525.0     16.4      5.9          trim_w2 = (x2 - 1).clamp(0, None)
   184                                                   # multiply by 2 because we are trimming from both sides
   185       336       6106.0     18.2      6.5          trim_w = torch.max(trim_w1, trim_w2) * 2
   186
   187       336       8363.0     24.9      8.9          trim_h1 = (0 - y1).clamp(0, None)
   188       336       5343.0     15.9      5.7          trim_h2 = (y2 - 1).clamp(0, None)
   189       336       5673.0     16.9      6.0          trim_h = torch.max(trim_h1, trim_h2) * 2
   190
   191       336       3806.0     11.3      4.0          new_box = box.clone()
   192       336       9629.0     28.7     10.2          new_box[:, 2] = new_box[:, 2] - trim_w
   193       336       7962.0     23.7      8.5          new_box[:, 3] = new_box[:, 3] - trim_h
   194       336        503.0      1.5      0.5          return new_box


Pystone time: 3.45784 s
File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
Function: _nms at line 362

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   362                                               @profiler.profile
   363                                               def _nms(self, boxes, mode=1):
   364                                                   """ Non maximum suppression.
   365                                                   Source: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
   366
   367                                                   Args:
   368                                                     boxes (tensor): Bounding boxes from get_detections
   369
   370                                                   Return:
   371                                                     (tensor): Pruned boxes
   372
   373                                                   CommandLine:
   374                                                       python -m netharn.models.yolo2.light_postproc GetBoundingBoxes._nms --profile
   375
   376                                                   Examples:
   377                                                       >>> import torch
   378                                                       >>> torch.random.manual_seed(0)
   379                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
   380                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
   381                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.01, nms_thresh=0.5)
   382                                                       >>> output = torch.randn(8, 125, 9, 9)
   383                                                       >>> boxes_ = self._get_boxes(output.data)
   384                                                       >>> from netharn import util
   385                                                       >>> boxes = torch.Tensor(boxes_[0])
   386                                                       >>> scores = boxes[..., 4:5]
   387                                                       >>> classes = boxes[..., 5:6]
   388                                                       >>> cxywh = util.Boxes(boxes[..., 0:4], 'cxywh')
   389                                                       >>> tlbr = cxywh.as_tlbr()
   390                                                       >>> util.non_max_supression(tlbr.data.numpy(), scores.numpy().ravel(), self.nms_thresh)
   391                                                       >>> self._nms(boxes, mode=0)
   392                                                       >>> self._nms(boxes, mode=1)
   393
   394                                                   Benchmark:
   395                                                       boxes = torch.Tensor(boxes_[0])
   396                                                       import ubelt
   397                                                       for timer in ubelt.Timerit(100, bestof=10, label='nms0+cpu'):
   398                                                           with timer:
   399                                                               self._nms(boxes, mode=0)
   400
   401                                                       for timer in ubelt.Timerit(100, bestof=10, label='nms1+cpu'):
   402                                                           with timer:
   403                                                               self._nms(boxes, mode=1)
   404
   405                                                       boxes = boxes.cuda()
   406                                                       import ubelt
   407                                                       for timer in ubelt.Timerit(100, bestof=10, label='nms0+gpu'):
   408                                                           with timer:
   409                                                               self._nms(boxes, mode=0)
   410
   411                                                       for timer in ubelt.Timerit(100, bestof=10, label='nms1+gpu'):
   412                                                           with timer:
   413                                                               self._nms(boxes, mode=1)
   414                                                   """
   415       336       2144.0      6.4      0.1          if boxes.numel() == 0:
   416                                                       return boxes
   417
   418       336       3259.0      9.7      0.1          a = boxes[:, :2]
   419       336       1763.0      5.2      0.1          b = boxes[:, 2:4]
   420       336      27196.0     80.9      0.8          bboxes = torch.cat([a - b / 2, a + b / 2], 1)
   421       336       2762.0      8.2      0.1          scores = boxes[:, 4]
   422
   423       336       1038.0      3.1      0.0          if mode == 1:
   424       336      18579.0     55.3      0.5              bboxes = bboxes.cpu().numpy().astype(np.float32)
   425       336      12181.0     36.3      0.4              scores = scores.cpu().numpy().astype(np.float32)
   426       336      12435.0     37.0      0.4              classes = boxes[..., 5].cpu().numpy().astype(np.int)
   427       336       1158.0      3.4      0.0              keep = []
   428      7035      57129.0      8.1      1.7              for idxs in ub.group_items(range(len(classes)), classes).values():
   429      6699      58671.0      8.8      1.7                  cls_boxes = bboxes.take(idxs, axis=0)
   430      6699      35854.0      5.4      1.0                  cls_scores = scores.take(idxs, axis=0)
   431      6699      21426.0      3.2      0.6                  cls_keep = util.non_max_supression(cls_boxes, cls_scores,
   432      6699    3078737.0    459.6     89.0                                                     self.nms_thresh)
   433      6699      70162.0     10.5      2.0                  keep.extend(list(ub.take(idxs, cls_keep)))
   434       336       4602.0     13.7      0.1              keep = sorted(keep)
   435       336      48744.0    145.1      1.4              return boxes[torch.LongTensor(keep)]
   436                                                   elif mode == 0 or mode == 2:
   437                                                       # if torch.cuda.is_available:
   438                                                       #     boxes = boxes.cuda()
   439
   440                                                       x1 = bboxes[:, 0]
   441                                                       y1 = bboxes[:, 1]
   442                                                       x2 = bboxes[:, 2]
   443                                                       y2 = bboxes[:, 3]
   444
   445                                                       areas = ((x2 - x1) * (y2 - y1))
   446                                                       _, order = scores.sort(0, descending=True)
   447
   448                                                       keep = []
   449                                                       while order.numel() > 0:
   450                                                           if order.numel() == 1:
   451                                                               if torch.__version__.startswith('0.3'):
   452                                                                   i = order[0]
   453                                                               else:
   454                                                                   i = order.item()
   455                                                               i = order.item()
   456                                                               keep.append(i)
   457                                                               break
   458
   459                                                           i = order[0]
   460                                                           keep.append(i)
   461
   462                                                           xx1 = x1[order[1:]].clamp(min=x1[i])
   463                                                           yy1 = y1[order[1:]].clamp(min=y1[i])
   464                                                           xx2 = x2[order[1:]].clamp(max=x2[i])
   465                                                           yy2 = y2[order[1:]].clamp(max=y2[i])
   466
   467                                                           w = (xx2 - xx1).clamp(min=0)
   468                                                           h = (yy2 - yy1).clamp(min=0)
   469                                                           inter = w * h
   470
   471                                                           iou = inter / (areas[i] + areas[order[1:]] - inter)
   472
   473                                                           ids = (iou <= self.nms_thresh).nonzero().squeeze()
   474                                                           if ids.numel() == 0:
   475                                                               break
   476                                                           order = order[ids + 1]
   477                                                       return boxes[torch.LongTensor(keep)]


Pystone time: 3.78105 s
File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
Function: __call__ at line 44

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    44                                               @profiler.profile
    45                                               def __call__(self, network_output, mode=1):
    46                                                   """ Compute bounding boxes after thresholding and nms
    47
    48                                                       network_output (torch.autograd.Variable): Output tensor from the lightnet network
    49
    50                                                   # Examples:
    51                                                   #     >>> import torch
    52                                                   #     >>> torch.random.manual_seed(0)
    53                                                   #     >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
    54                                                   #     >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
    55                                                   #     >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
    56                                                   #     >>> output = torch.randn(8, 125, 9, 9)
    57                                                   #     >>> boxes = self(output)
    58                                                   #     >>> assert len(boxes) == 8
    59                                                   #     >>> assert all(b.shape[1] == 6 for b in boxes)
    60
    61                                                   CommandLine:
    62                                                       python -m netharn.models.yolo2.light_postproc GetBoundingBoxes.__call__:1 --profile
    63                                                       python -m netharn.models.yolo2.light_postproc GetBoundingBoxes.__call__:2 --profile
    64
    65                                                   Example:
    66                                                       >>> import torch
    67                                                       >>> torch.random.manual_seed(0)
    68                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
    69                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
    70                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
    71                                                       >>> import ubelt
    72                                                       >>> output = torch.randn(16, 125, 9, 9)
    73                                                       >>> output = output.cuda()
    74                                                       >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+gpu'):
    75                                                       >>>     output_ = output.clone()
    76                                                       >>>     with timer:
    77                                                       >>>         self(output_, mode=0)
    78
    79                                                   Example:
    80                                                       >>> import torch
    81                                                       >>> torch.random.manual_seed(0)
    82                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
    83                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
    84                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
    85                                                       >>> import ubelt
    86                                                       >>> output = torch.randn(16, 125, 9, 9)
    87                                                       >>> output = output.cuda()
    88                                                       >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+gpu'):
    89                                                       >>>     output_ = output.clone()
    90                                                       >>>     with timer:
    91                                                       >>>         self(output_, mode=1)
    92
    93                                                   Benchmark:
    94                                                       >>> import torch
    95                                                       >>> torch.random.manual_seed(0)
    96                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
    97                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
    98                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
    99                                                       >>> import ubelt
   100                                                       >>> output = torch.randn(16, 125, 9, 9)
   101                                                       >>> #
   102                                                       >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+cpu'):
   103                                                       >>>     output_ = output.clone()
   104                                                       >>>     with timer:
   105                                                       >>>         self(output_, mode=0)
   106                                                       >>> #
   107                                                       >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+cpu'):
   108                                                       >>>     output_ = output.clone()
   109                                                       >>>     with timer:
   110                                                       >>>         self(output_, mode=1)
   111                                                       >>> #
   112                                                       >>> output = output.cuda()
   113                                                       >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+gpu'):
   114                                                       >>>     output_ = output.clone()
   115                                                       >>>     with timer:
   116                                                       >>>         self(output_, mode=0)
   117                                                       >>> #
   118                                                       >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+gpu'):
   119                                                       >>>     output_ = output.clone()
   120                                                       >>>     with timer:
   121                                                       >>>         self(output_, mode=1)
   122                                                       >>> #for timer in ubelt.Timerit(21, bestof=3, label='mode2+gpu'):
   123                                                       >>> #    output_ = output.clone()
   124                                                       >>> #    with timer:
   125                                                       >>> #        self(output_, mode=2)
   126
   127                                                       %timeit self(output.data, mode=0)
   128                                                       %timeit self(output.data, mode=1)
   129                                                       %timeit self(output.data, mode=2)
   130                                                   """
   131        21      65289.0   3109.0      1.7          boxes = self._get_boxes(network_output.data, mode=mode)
   132        21    3609713.0 171891.1     95.5          boxes = [self._nms(box, mode=mode) for box in boxes]
   133
   134                                                   # force all boxes to be inside the image
   135        21     105953.0   5045.4      2.8          boxes = [self._clip_boxes(box) for box in boxes]
   136        21         51.0      2.4      0.0          postout = boxes
   137        21         46.0      2.2      0.0          return postout


 -1.00 seconds - None                        :None:None
  0.05 seconds - GetBoundingBoxes._get_boxes :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:202
  0.09 seconds - GetBoundingBoxes._clip_boxes:/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:139
  3.46 seconds - GetBoundingBoxes._nms       :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:362
  3.78 seconds - GetBoundingBoxes.__call__   :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:44
	Timer unit: 1e-06 s


	Pystone time: 0.05441 s
	File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
	Function: _get_boxes at line 202

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	202 @profiler.profile
	203 def _get_boxes(self, output, mode=1):
	204 """
	205 Returns array of detections for every image in batch
	206
	207 Examples:
	208 >>> import torch
	209 >>> torch.random.manual_seed(0)
	210 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	211 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	212 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	213 >>> output = torch.randn(16, 125, 9, 9)
	214 >>> from netharn import XPU
	215 >>> output = XPU.cast('gpu').move(output)
	216 >>> boxes = self._get_boxes(output.data)
	217 >>> assert len(boxes) == 16
	218 >>> assert all(len(b[0]) == 6 for b in boxes)
	219
	220 Benchmark:
	221 >>> from netharn.models.yolo2.light_postproc import *
	222 >>> import torch
	223 >>> torch.random.manual_seed(0)
	224 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	225 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	226 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	227 >>> output = torch.randn(16, 125, 9, 9)
	228 >>> from netharn import XPU
	229 >>> output = XPU.cast('gpu').move(output)
	230 >>> for timer in ub.Timerit(100, bestof=10, label='mode 0'):
	231 >>> output_ = output.clone()
	232 >>> with timer:
	233 >>> boxes0 = self._get_boxes(output_.data, mode=0)
	234 >>> for timer in ub.Timerit(100, bestof=10, label='mode 1'):
	235 >>> output_ = output.clone()
	236 >>> with timer:
	237 >>> boxes1 = self._get_boxes(output_.data, mode=1)
	238 >>> for b0, b1 in zip(boxes0, boxes1):
	239 >>> assert np.all(b0.cpu() == b1.cpu())
	240
	241 >>> from lightnet.data.postprocess import GetBoundingBoxes as GetBoundingBoxesOrig
	242 >>> anchors_dict = dict(num=5, values=[1.3221,1.73145,3.19275,4.00944,5.05587,
	243 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	244 >>> post = GetBoundingBoxesOrig(anchors=anchors_dict, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	245 >>> for timer in ub.Timerit(100, bestof=10, label='original'):
	246 >>> output_ = output.clone()
	247 >>> with timer:
	248 >>> boxes3 = post._get_boxes(output_.data)
	249 >>> # Check that the output is the same
	250 >>> for b0, b3 in zip(boxes0, boxes3):
	251 >>> b3_ = torch.Tensor(b3)
	252 >>> assert np.all(b0.cpu() == b3_.cpu())
	253 """
	254 # dont modify inplace
	255 21 339.0 16.1 0.6 output = output.clone()
	256
	257 # Check dimensions
	258 21 108.0 5.1 0.2 if output.dim() == 3:
	259 output.unsqueeze_(0)
	260
	261 # Variables
	262 21 118.0 5.6 0.2 cuda = output.is_cuda
	263 21 151.0 7.2 0.3 bsize = output.size(0)
	264 21 96.0 4.6 0.2 h = output.size(2)
	265 21 92.0 4.4 0.2 w = output.size(3)
	266
	267 # Compute xc,yc, w,h, box_score on Tensor
	268 21 833.0 39.7 1.5 lin_x = torch.linspace(0, w - 1, w).repeat(h, 1).view(h * w)
	269 21 598.0 28.5 1.1 lin_y = torch.linspace(0, h - 1, h).repeat(w, 1).t().contiguous().view(h * w)
	270 21 575.0 27.4 1.1 anchor_w = torch.Tensor(self.anchors[::2]).view(1, self.num_anchors, 1)
	271 21 203.0 9.7 0.4 anchor_h = torch.Tensor(self.anchors[1::2]).view(
	272 21 184.0 8.8 0.3 1, self.num_anchors, 1)
	273 21 109.0 5.2 0.2 if cuda:
	274 21 656.0 31.2 1.2 lin_x = lin_x.cuda()
	275 21 338.0 16.1 0.6 lin_y = lin_y.cuda()
	276 21 314.0 15.0 0.6 anchor_w = anchor_w.cuda()
	277 21 304.0 14.5 0.6 anchor_h = anchor_h.cuda()
	278
	279 # -1 == 5+num_classes (we can drop feature maps if 1 class)
	280 21 223.0 10.6 0.4 output_ = output.view(bsize, self.num_anchors, -1, h * w)
	281 21 1005.0 47.9 1.8 output_[:, :, 0, :].sigmoid_().add_(lin_x).div_(w) # X center
	282 21 707.0 33.7 1.3 output_[:, :, 1, :].sigmoid_().add_(lin_y).div_(h) # Y center
	283 21 761.0 36.2 1.4 output_[:, :, 2, :].exp_().mul_(anchor_w).div_(w) # Width
	284 21 585.0 27.9 1.1 output_[:, :, 3, :].exp_().mul_(anchor_h).div_(h) # Height
	285 21 309.0 14.7 0.6 output_[:, :, 4, :].sigmoid_() # Box score
	286
	287 # Compute class_score
	288 21 101.0 4.8 0.2 if self.num_classes > 1:
	289 21 139.0 6.6 0.3 if torch.__version__.startswith('0.3'):
	290 cls_scores = torch.nn.functional.softmax(
	291 Variable(output_[:, :, 5:, :], volatile=True), 2).data
	292 else:
	293 21 125.0 6.0 0.2 cls_scores = torch.nn.functional.softmax(
	294 21 1123.0 53.5 2.1 output_[:, :, 5:, :], 2)
	295 21 587.0 28.0 1.1 cls_max, cls_max_idx = torch.max(cls_scores, 2)
	296 21 428.0 20.4 0.8 cls_max.mul_(output_[:, :, 4, :])
	297 else:
	298 cls_max = output_[:, :, 4, :]
	299 cls_max_idx = torch.zeros_like(cls_max)
	300
	301 # Save detection if conf*class_conf is higher than threshold
	302
	303 21 95.0 4.5 0.2 if mode == 0:
	304 output_ = output_.cpu()
	305 cls_max = cls_max.cpu()
	306 cls_max_idx = cls_max_idx.cpu()
	307 boxes = []
	308 for b in range(bsize):
	309 box_batch = []
	310 for a in range(self.num_anchors):
	311 for i in range(h * w):
	312 if cls_max[b, a, i] > self.conf_thresh:
	313 box_batch.append([
	314 output_[b, a, 0, i],
	315 output_[b, a, 1, i],
	316 output_[b, a, 2, i],
	317 output_[b, a, 3, i],
	318 cls_max[b, a, i],
	319 cls_max_idx[b, a, i]
	320 ])
	321 box_batch = torch.Tensor(box_batch)
	322 boxes.append(box_batch)
	323 21 89.0 4.2 0.2 elif mode == 1 or mode == 2:
	324 # Save detection if conf*class_conf is higher than threshold
	325 21 572.0 27.2 1.1 flags = cls_max > self.conf_thresh
	326 21 272.0 13.0 0.5 flat_flags = flags.view(-1)
	327
	328 21 1915.0 91.2 3.5 if not np.any(flat_flags):
	329 return [torch.FloatTensor([]) for _ in range(bsize)]
	330
	331 # number of potential detections per batch
	332 21 797.0 38.0 1.5 item_size = np.prod(flags.shape[1:])
	333 21 108.0 5.1 0.2 slices = [slice((item_size * i), (item_size * (i + 1)))
	334 21 415.0 19.8 0.8 for i in range(bsize)]
	335 # number of detections per batch (prepended with a zero)
	336 21 108.0 5.1 0.2 n_dets = torch.stack(
	337 21 14893.0 709.2 27.4 [flat_flags[0].long() * 0] + [flat_flags[sl].long().sum() for sl in slices])
	338 # indices of splits between filtered detections
	339 21 1523.0 72.5 2.8 filtered_split_idxs = torch.cumsum(n_dets, dim=0)
	340
	341 # Do actual filtering of detections by confidence thresh
	342 21 710.0 33.8 1.3 flat_coords = output_.transpose(2, 3)[..., 0:4].clone().view(-1, 4)
	343 21 159.0 7.6 0.3 flat_class_max = cls_max.view(-1)
	344 21 167.0 8.0 0.3 flat_class_idx = cls_max_idx.view(-1)
	345
	346 21 2837.0 135.1 5.2 coords = flat_coords[flat_flags]
	347 21 1990.0 94.8 3.7 scores = flat_class_max[flat_flags]
	348 21 1443.0 68.7 2.7 cls_idxs = flat_class_idx[flat_flags]
	349
	350 21 251.0 12.0 0.5 filtered_dets = torch.cat([coords, scores[:, None],
	351 21 1032.0 49.1 1.9 cls_idxs[:, None].float()], dim=1)
	352
	353 21 100.0 4.8 0.2 boxes2 = []
	354 357 3904.0 10.9 7.2 for lx, rx in zip(filtered_split_idxs, filtered_split_idxs[1:]):
	355 336 8135.0 24.2 15.0 batch_box = filtered_dets[lx:rx]
	356 336 1604.0 4.8 2.9 boxes2.append(batch_box)
	357
	358 21 91.0 4.3 0.2 boxes = boxes2
	359
	360 21 89.0 4.2 0.2 return boxes


	Pystone time: 0.094206 s
	File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
	Function: _clip_boxes at line 139

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	139 @profiler.profile
	140 def _clip_boxes(self, box):
	141 """
	142 CommandLine:
	143 python ~/code/netharn/netharn/models/yolo2/light_postproc.py GetBoundingBoxes._clip_boxes
	144
	145 Example:
	146 >>> import torch
	147 >>> torch.random.manual_seed(0)
	148 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	149 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	150 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	151 >>> # Make random boxes for one item in a batch
	152 >>> box = torch.randn(7, 6)
	153 >>> box[:, 0].sigmoid_()
	154 >>> box[:, 1].sigmoid_()
	155 >>> box.abs_()
	156 >>> new_box = self._clip_boxes(box)
	157 >>> box_tlbr = util.Boxes(box.cpu().numpy()[:, 0:4], 'cxywh').as_tlbr().data
	158 >>> new_tlbr = util.Boxes(new_box.cpu().numpy()[:, 0:4], 'cxywh').as_tlbr().data
	159 >>> #
	160 >>> print('old')
	161 >>> print(box_tlbr)
	162 >>> print('new')
	163 >>> print(new_tlbr)
	164 >>> #print('trim_w = {}'.format(ub.repr2(trim_w.numpy(), precision=4)))
	165 >>> #print('trim_h = {}'.format(ub.repr2(trim_h.numpy(), precision=4)))
	166 >>> assert np.all(new_tlbr.T[2] <= 1.01)
	167 >>> assert np.all(new_tlbr.T[2] >= -0.01)
	168 >>> assert np.all(new_tlbr.T[3] <= 1.01)
	169 >>> assert np.all(new_tlbr.T[3] >= -0.01)
	170 """
	171 336 1332.0 4.0 1.4 if len(box) == 0:
	172 return box
	173
	174 336 6518.0 19.4 6.9 cx, cy, w, h = box.t()[0:4]
	175
	176 336 7328.0 21.8 7.8 x1 = cx - (w / 2)
	177 336 5956.0 17.7 6.3 x2 = cx + (w / 2)
	178
	179 336 5686.0 16.9 6.0 y1 = cy - (h / 2)
	180 336 5466.0 16.3 5.8 y2 = cy + (h / 2)
	181
	182 336 9010.0 26.8 9.6 trim_w1 = (0 - x1).clamp(0, None)
	183 336 5525.0 16.4 5.9 trim_w2 = (x2 - 1).clamp(0, None)
	184 # multiply by 2 because we are trimming from both sides
	185 336 6106.0 18.2 6.5 trim_w = torch.max(trim_w1, trim_w2) * 2
	186
	187 336 8363.0 24.9 8.9 trim_h1 = (0 - y1).clamp(0, None)
	188 336 5343.0 15.9 5.7 trim_h2 = (y2 - 1).clamp(0, None)
	189 336 5673.0 16.9 6.0 trim_h = torch.max(trim_h1, trim_h2) * 2
	190
	191 336 3806.0 11.3 4.0 new_box = box.clone()
	192 336 9629.0 28.7 10.2 new_box[:, 2] = new_box[:, 2] - trim_w
	193 336 7962.0 23.7 8.5 new_box[:, 3] = new_box[:, 3] - trim_h
	194 336 503.0 1.5 0.5 return new_box


	Pystone time: 3.45784 s
	File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
	Function: _nms at line 362

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	362 @profiler.profile
	363 def _nms(self, boxes, mode=1):
	364 """ Non maximum suppression.
	365 Source: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
	366
	367 Args:
	368 boxes (tensor): Bounding boxes from get_detections
	369
	370 Return:
	371 (tensor): Pruned boxes
	372
	373 CommandLine:
	374 python -m netharn.models.yolo2.light_postproc GetBoundingBoxes._nms --profile
	375
	376 Examples:
	377 >>> import torch
	378 >>> torch.random.manual_seed(0)
	379 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	380 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	381 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.01, nms_thresh=0.5)
	382 >>> output = torch.randn(8, 125, 9, 9)
	383 >>> boxes_ = self._get_boxes(output.data)
	384 >>> from netharn import util
	385 >>> boxes = torch.Tensor(boxes_[0])
	386 >>> scores = boxes[..., 4:5]
	387 >>> classes = boxes[..., 5:6]
	388 >>> cxywh = util.Boxes(boxes[..., 0:4], 'cxywh')
	389 >>> tlbr = cxywh.as_tlbr()
	390 >>> util.non_max_supression(tlbr.data.numpy(), scores.numpy().ravel(), self.nms_thresh)
	391 >>> self._nms(boxes, mode=0)
	392 >>> self._nms(boxes, mode=1)
	393
	394 Benchmark:
	395 boxes = torch.Tensor(boxes_[0])
	396 import ubelt
	397 for timer in ubelt.Timerit(100, bestof=10, label='nms0+cpu'):
	398 with timer:
	399 self._nms(boxes, mode=0)
	400
	401 for timer in ubelt.Timerit(100, bestof=10, label='nms1+cpu'):
	402 with timer:
	403 self._nms(boxes, mode=1)
	404
	405 boxes = boxes.cuda()
	406 import ubelt
	407 for timer in ubelt.Timerit(100, bestof=10, label='nms0+gpu'):
	408 with timer:
	409 self._nms(boxes, mode=0)
	410
	411 for timer in ubelt.Timerit(100, bestof=10, label='nms1+gpu'):
	412 with timer:
	413 self._nms(boxes, mode=1)
	414 """
	415 336 2144.0 6.4 0.1 if boxes.numel() == 0:
	416 return boxes
	417
	418 336 3259.0 9.7 0.1 a = boxes[:, :2]
	419 336 1763.0 5.2 0.1 b = boxes[:, 2:4]
	420 336 27196.0 80.9 0.8 bboxes = torch.cat([a - b / 2, a + b / 2], 1)
	421 336 2762.0 8.2 0.1 scores = boxes[:, 4]
	422
	423 336 1038.0 3.1 0.0 if mode == 1:
	424 336 18579.0 55.3 0.5 bboxes = bboxes.cpu().numpy().astype(np.float32)
	425 336 12181.0 36.3 0.4 scores = scores.cpu().numpy().astype(np.float32)
	426 336 12435.0 37.0 0.4 classes = boxes[..., 5].cpu().numpy().astype(np.int)
	427 336 1158.0 3.4 0.0 keep = []
	428 7035 57129.0 8.1 1.7 for idxs in ub.group_items(range(len(classes)), classes).values():
	429 6699 58671.0 8.8 1.7 cls_boxes = bboxes.take(idxs, axis=0)
	430 6699 35854.0 5.4 1.0 cls_scores = scores.take(idxs, axis=0)
	431 6699 21426.0 3.2 0.6 cls_keep = util.non_max_supression(cls_boxes, cls_scores,
	432 6699 3078737.0 459.6 89.0 self.nms_thresh)
	433 6699 70162.0 10.5 2.0 keep.extend(list(ub.take(idxs, cls_keep)))
	434 336 4602.0 13.7 0.1 keep = sorted(keep)
	435 336 48744.0 145.1 1.4 return boxes[torch.LongTensor(keep)]
	436 elif mode == 0 or mode == 2:
	437 # if torch.cuda.is_available:
	438 # boxes = boxes.cuda()
	439
	440 x1 = bboxes[:, 0]
	441 y1 = bboxes[:, 1]
	442 x2 = bboxes[:, 2]
	443 y2 = bboxes[:, 3]
	444
	445 areas = ((x2 - x1) * (y2 - y1))
	446 _, order = scores.sort(0, descending=True)
	447
	448 keep = []
	449 while order.numel() > 0:
	450 if order.numel() == 1:
	451 if torch.__version__.startswith('0.3'):
	452 i = order[0]
	453 else:
	454 i = order.item()
	455 i = order.item()
	456 keep.append(i)
	457 break
	458
	459 i = order[0]
	460 keep.append(i)
	461
	462 xx1 = x1[order[1:]].clamp(min=x1[i])
	463 yy1 = y1[order[1:]].clamp(min=y1[i])
	464 xx2 = x2[order[1:]].clamp(max=x2[i])
	465 yy2 = y2[order[1:]].clamp(max=y2[i])
	466
	467 w = (xx2 - xx1).clamp(min=0)
	468 h = (yy2 - yy1).clamp(min=0)
	469 inter = w * h
	470
	471 iou = inter / (areas[i] + areas[order[1:]] - inter)
	472
	473 ids = (iou <= self.nms_thresh).nonzero().squeeze()
	474 if ids.numel() == 0:
	475 break
	476 order = order[ids + 1]
	477 return boxes[torch.LongTensor(keep)]


	Pystone time: 3.78105 s
	File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
	Function: __call__ at line 44

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	44 @profiler.profile
	45 def __call__(self, network_output, mode=1):
	46 """ Compute bounding boxes after thresholding and nms
	47
	48 network_output (torch.autograd.Variable): Output tensor from the lightnet network
	49
	50 # Examples:
	51 # >>> import torch
	52 # >>> torch.random.manual_seed(0)
	53 # >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	54 # >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	55 # >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	56 # >>> output = torch.randn(8, 125, 9, 9)
	57 # >>> boxes = self(output)
	58 # >>> assert len(boxes) == 8
	59 # >>> assert all(b.shape[1] == 6 for b in boxes)
	60
	61 CommandLine:
	62 python -m netharn.models.yolo2.light_postproc GetBoundingBoxes.__call__:1 --profile
	63 python -m netharn.models.yolo2.light_postproc GetBoundingBoxes.__call__:2 --profile
	64
	65 Example:
	66 >>> import torch
	67 >>> torch.random.manual_seed(0)
	68 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	69 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	70 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	71 >>> import ubelt
	72 >>> output = torch.randn(16, 125, 9, 9)
	73 >>> output = output.cuda()
	74 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+gpu'):
	75 >>> output_ = output.clone()
	76 >>> with timer:
	77 >>> self(output_, mode=0)
	78
	79 Example:
	80 >>> import torch
	81 >>> torch.random.manual_seed(0)
	82 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	83 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	84 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	85 >>> import ubelt
	86 >>> output = torch.randn(16, 125, 9, 9)
	87 >>> output = output.cuda()
	88 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+gpu'):
	89 >>> output_ = output.clone()
	90 >>> with timer:
	91 >>> self(output_, mode=1)
	92
	93 Benchmark:
	94 >>> import torch
	95 >>> torch.random.manual_seed(0)
	96 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	97 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	98 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	99 >>> import ubelt
	100 >>> output = torch.randn(16, 125, 9, 9)
	101 >>> #
	102 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+cpu'):
	103 >>> output_ = output.clone()
	104 >>> with timer:
	105 >>> self(output_, mode=0)
	106 >>> #
	107 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+cpu'):
	108 >>> output_ = output.clone()
	109 >>> with timer:
	110 >>> self(output_, mode=1)
	111 >>> #
	112 >>> output = output.cuda()
	113 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+gpu'):
	114 >>> output_ = output.clone()
	115 >>> with timer:
	116 >>> self(output_, mode=0)
	117 >>> #
	118 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+gpu'):
	119 >>> output_ = output.clone()
	120 >>> with timer:
	121 >>> self(output_, mode=1)
	122 >>> #for timer in ubelt.Timerit(21, bestof=3, label='mode2+gpu'):
	123 >>> # output_ = output.clone()
	124 >>> # with timer:
	125 >>> # self(output_, mode=2)
	126
	127 %timeit self(output.data, mode=0)
	128 %timeit self(output.data, mode=1)
	129 %timeit self(output.data, mode=2)
	130 """
	131 21 65289.0 3109.0 1.7 boxes = self._get_boxes(network_output.data, mode=mode)
	132 21 3609713.0 171891.1 95.5 boxes = [self._nms(box, mode=mode) for box in boxes]
	133
	134 # force all boxes to be inside the image
	135 21 105953.0 5045.4 2.8 boxes = [self._clip_boxes(box) for box in boxes]
	136 21 51.0 2.4 0.0 postout = boxes
	137 21 46.0 2.2 0.0 return postout


	-1.00 seconds - None :None:None
	0.05 seconds - GetBoundingBoxes._get_boxes :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:202
	0.09 seconds - GetBoundingBoxes._clip_boxes:/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:139
	3.46 seconds - GetBoundingBoxes._nms :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:362
	3.78 seconds - GetBoundingBoxes.__call__ :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:44