Erotemic/gist:c94cfc71c6054329a254df43bb377eb8

## gistfile1.txt
Timer unit: 1e-06 s


Pystone time: 0.034663 s
File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
Function: _clip_boxes at line 139

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   139                                               @profiler.profile
   140                                               def _clip_boxes(self, box):
   141                                                   """
   142                                                   CommandLine:
   143                                                       python ~/code/netharn/netharn/models/yolo2/light_postproc.py GetBoundingBoxes._clip_boxes
   144
   145                                                   Example:
   146                                                       >>> import torch
   147                                                       >>> torch.random.manual_seed(0)
   148                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
   149                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
   150                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
   151                                                       >>> # Make random boxes for one item in a batch
   152                                                       >>> box = torch.randn(7, 6)
   153                                                       >>> box[:, 0].sigmoid_()
   154                                                       >>> box[:, 1].sigmoid_()
   155                                                       >>> box.abs_()
   156                                                       >>> new_box = self._clip_boxes(box)
   157                                                       >>> box_tlbr = util.Boxes(box.cpu().numpy()[:, 0:4], 'cxywh').as_tlbr().data
   158                                                       >>> new_tlbr = util.Boxes(new_box.cpu().numpy()[:, 0:4], 'cxywh').as_tlbr().data
   159                                                       >>> #
   160                                                       >>> print('old')
   161                                                       >>> print(box_tlbr)
   162                                                       >>> print('new')
   163                                                       >>> print(new_tlbr)
   164                                                       >>> #print('trim_w = {}'.format(ub.repr2(trim_w.numpy(), precision=4)))
   165                                                       >>> #print('trim_h = {}'.format(ub.repr2(trim_h.numpy(), precision=4)))
   166                                                       >>> assert np.all(new_tlbr.T[2] <= 1.01)
   167                                                       >>> assert np.all(new_tlbr.T[2] >= -0.01)
   168                                                       >>> assert np.all(new_tlbr.T[3] <= 1.01)
   169                                                       >>> assert np.all(new_tlbr.T[3] >= -0.01)
   170                                                   """
   171       336       1027.0      3.1      3.0          if len(box) == 0:
   172                                                       return box
   173
   174       336       4672.0     13.9     13.5          cx, cy, w, h = box.t()[0:4]
   175
   176       336       2505.0      7.5      7.2          x1 = cx - (w / 2)
   177       336       2056.0      6.1      5.9          x2 = cx + (w / 2)
   178
   179       336       1982.0      5.9      5.7          y1 = cy - (h / 2)
   180       336       1912.0      5.7      5.5          y2 = cy + (h / 2)
   181
   182       336       2658.0      7.9      7.7          trim_w1 = (0 - x1).clamp(0, None)
   183       336       1532.0      4.6      4.4          trim_w2 = (x2 - 1).clamp(0, None)
   184                                                   # multiply by 2 because we are trimming from both sides
   185       336       1951.0      5.8      5.6          trim_w = torch.max(trim_w1, trim_w2) * 2
   186
   187       336       2343.0      7.0      6.8          trim_h1 = (0 - y1).clamp(0, None)
   188       336       1502.0      4.5      4.3          trim_h2 = (y2 - 1).clamp(0, None)
   189       336       1578.0      4.7      4.6          trim_h = torch.max(trim_h1, trim_h2) * 2
   190
   191       336       1199.0      3.6      3.5          new_box = box.clone()
   192       336       4124.0     12.3     11.9          new_box[:, 2] = new_box[:, 2] - trim_w
   193       336       3218.0      9.6      9.3          new_box[:, 3] = new_box[:, 3] - trim_h
   194       336        404.0      1.2      1.2          return new_box


Pystone time: 4.16157 s
File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
Function: _get_boxes at line 202

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   202                                               @profiler.profile
   203                                               def _get_boxes(self, output, mode=1):
   204                                                   """
   205                                                   Returns array of detections for every image in batch
   206
   207                                                   Examples:
   208                                                       >>> import torch
   209                                                       >>> torch.random.manual_seed(0)
   210                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
   211                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
   212                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
   213                                                       >>> output = torch.randn(16, 125, 9, 9)
   214                                                       >>> from netharn import XPU
   215                                                       >>> output = XPU.cast('gpu').move(output)
   216                                                       >>> boxes = self._get_boxes(output.data)
   217                                                       >>> assert len(boxes) == 16
   218                                                       >>> assert all(len(b[0]) == 6 for b in boxes)
   219
   220                                                   Benchmark:
   221                                                       >>> from netharn.models.yolo2.light_postproc import *
   222                                                       >>> import torch
   223                                                       >>> torch.random.manual_seed(0)
   224                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
   225                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
   226                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
   227                                                       >>> output = torch.randn(16, 125, 9, 9)
   228                                                       >>> from netharn import XPU
   229                                                       >>> output = XPU.cast('gpu').move(output)
   230                                                       >>> for timer in ub.Timerit(100, bestof=10, label='mode 0'):
   231                                                       >>>     output_ = output.clone()
   232                                                       >>>     with timer:
   233                                                       >>>         boxes0 = self._get_boxes(output_.data, mode=0)
   234                                                       >>> for timer in ub.Timerit(100, bestof=10, label='mode 1'):
   235                                                       >>>     output_ = output.clone()
   236                                                       >>>     with timer:
   237                                                       >>>         boxes1 = self._get_boxes(output_.data, mode=1)
   238                                                       >>> for b0, b1 in zip(boxes0, boxes1):
   239                                                       >>>     assert np.all(b0.cpu() == b1.cpu())
   240
   241                                                       >>> from lightnet.data.postprocess import GetBoundingBoxes as GetBoundingBoxesOrig
   242                                                       >>> anchors_dict = dict(num=5, values=[1.3221,1.73145,3.19275,4.00944,5.05587,
   243                                                       >>>                               8.09892,9.47112,4.84053,11.2364,10.0071])
   244                                                       >>> post = GetBoundingBoxesOrig(anchors=anchors_dict, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
   245                                                       >>> for timer in ub.Timerit(100, bestof=10, label='original'):
   246                                                       >>>     output_ = output.clone()
   247                                                       >>>     with timer:
   248                                                       >>>         boxes3 = post._get_boxes(output_.data)
   249                                                       >>> # Check that the output is the same
   250                                                       >>> for b0, b3 in zip(boxes0, boxes3):
   251                                                       >>>     b3_ = torch.Tensor(b3)
   252                                                       >>>     assert np.all(b0.cpu() == b3_.cpu())
   253                                                   """
   254                                                   # dont modify inplace
   255        21        353.0     16.8      0.0          output = output.clone()
   256
   257                                                   # Check dimensions
   258        21        109.0      5.2      0.0          if output.dim() == 3:
   259                                                       output.unsqueeze_(0)
   260
   261                                                   # Variables
   262        21        105.0      5.0      0.0          cuda = output.is_cuda
   263        21        123.0      5.9      0.0          bsize = output.size(0)
   264        21         99.0      4.7      0.0          h = output.size(2)
   265        21         97.0      4.6      0.0          w = output.size(3)
   266
   267                                                   # Compute xc,yc, w,h, box_score on Tensor
   268        21        668.0     31.8      0.0          lin_x = torch.linspace(0, w - 1, w).repeat(h, 1).view(h * w)
   269        21        510.0     24.3      0.0          lin_y = torch.linspace(0, h - 1, h).repeat(w, 1).t().contiguous().view(h * w)
   270        21        589.0     28.0      0.0          anchor_w = torch.Tensor(self.anchors[::2]).view(1, self.num_anchors, 1)
   271        21        224.0     10.7      0.0          anchor_h = torch.Tensor(self.anchors[1::2]).view(
   272        21        165.0      7.9      0.0              1, self.num_anchors, 1)
   273        21         88.0      4.2      0.0          if cuda:
   274        21        683.0     32.5      0.0              lin_x = lin_x.cuda()
   275        21        351.0     16.7      0.0              lin_y = lin_y.cuda()
   276        21        321.0     15.3      0.0              anchor_w = anchor_w.cuda()
   277        21        304.0     14.5      0.0              anchor_h = anchor_h.cuda()
   278
   279                                                   # -1 == 5+num_classes (we can drop feature maps if 1 class)
   280        21        248.0     11.8      0.0          output_ = output.view(bsize, self.num_anchors, -1, h * w)
   281        21       1236.0     58.9      0.0          output_[:, :, 0, :].sigmoid_().add_(lin_x).div_(w)          # X center
   282        21        678.0     32.3      0.0          output_[:, :, 1, :].sigmoid_().add_(lin_y).div_(h)          # Y center
   283        21        821.0     39.1      0.0          output_[:, :, 2, :].exp_().mul_(anchor_w).div_(w)           # Width
   284        21        609.0     29.0      0.0          output_[:, :, 3, :].exp_().mul_(anchor_h).div_(h)           # Height
   285        21        317.0     15.1      0.0          output_[:, :, 4, :].sigmoid_()                              # Box score
   286
   287                                                   # Compute class_score
   288        21        103.0      4.9      0.0          if self.num_classes > 1:
   289        21        114.0      5.4      0.0              if torch.__version__.startswith('0.3'):
   290                                                           cls_scores = torch.nn.functional.softmax(
   291                                                               Variable(output_[:, :, 5:, :], volatile=True), 2).data
   292                                                       else:
   293        21        150.0      7.1      0.0                  cls_scores = torch.nn.functional.softmax(
   294        21       1204.0     57.3      0.0                      output_[:, :, 5:, :], 2)
   295        21        580.0     27.6      0.0              cls_max, cls_max_idx = torch.max(cls_scores, 2)
   296        21        381.0     18.1      0.0              cls_max.mul_(output_[:, :, 4, :])
   297                                                   else:
   298                                                       cls_max = output_[:, :, 4, :]
   299                                                       cls_max_idx = torch.zeros_like(cls_max)
   300
   301                                                   # Save detection if conf*class_conf is higher than threshold
   302
   303        21         98.0      4.7      0.0          if mode == 0:
   304        21       2910.0    138.6      0.1              output_ = output_.cpu()
   305        21        571.0     27.2      0.0              cls_max = cls_max.cpu()
   306        21        729.0     34.7      0.0              cls_max_idx = cls_max_idx.cpu()
   307        21         94.0      4.5      0.0              boxes = []
   308       357       1576.0      4.4      0.0              for b in range(bsize):
   309       336       1560.0      4.6      0.0                  box_batch = []
   310      2016       9407.0      4.7      0.2                  for a in range(self.num_anchors):
   311    137760     623744.0      4.5     15.0                      for i in range(h * w):
   312    136080    1446787.0     10.6     34.8                          if cls_max[b, a, i] > self.conf_thresh:
   313     33411     152048.0      4.6      3.7                              box_batch.append([
   314     33411     314918.0      9.4      7.6                                  output_[b, a, 0, i],
   315     33411     308103.0      9.2      7.4                                  output_[b, a, 1, i],
   316     33411     306491.0      9.2      7.4                                  output_[b, a, 2, i],
   317     33411     306215.0      9.2      7.4                                  output_[b, a, 3, i],
   318     33411     273042.0      8.2      6.6                                  cls_max[b, a, i],
   319     33411     298077.0      8.9      7.2                                  cls_max_idx[b, a, i]
   320                                                                       ])
   321       336     102186.0    304.1      2.5                  box_batch = torch.Tensor(box_batch)
   322       336       1691.0      5.0      0.0                  boxes.append(box_batch)
   323                                                   elif mode == 1 or mode == 2:
   324                                                       # Save detection if conf*class_conf is higher than threshold
   325                                                       flags = cls_max > self.conf_thresh
   326                                                       flat_flags = flags.view(-1)
   327
   328                                                       if not np.any(flat_flags):
   329                                                           return [torch.FloatTensor([]) for _ in range(bsize)]
   330
   331                                                       # number of potential detections per batch
   332                                                       item_size = np.prod(flags.shape[1:])
   333                                                       slices = [slice((item_size * i), (item_size * (i + 1)))
   334                                                                 for i in range(bsize)]
   335                                                       # number of detections per batch (prepended with a zero)
   336                                                       n_dets = torch.stack(
   337                                                           [flat_flags[0].long() * 0] + [flat_flags[sl].long().sum() for sl in slices])
   338                                                       # indices of splits between filtered detections
   339                                                       filtered_split_idxs = torch.cumsum(n_dets, dim=0)
   340
   341                                                       # Do actual filtering of detections by confidence thresh
   342                                                       flat_coords = output_.transpose(2, 3)[..., 0:4].clone().view(-1, 4)
   343                                                       flat_class_max = cls_max.view(-1)
   344                                                       flat_class_idx = cls_max_idx.view(-1)
   345
   346                                                       coords = flat_coords[flat_flags]
   347                                                       scores = flat_class_max[flat_flags]
   348                                                       cls_idxs = flat_class_idx[flat_flags]
   349
   350                                                       filtered_dets = torch.cat([coords, scores[:, None],
   351                                                                                  cls_idxs[:, None].float()], dim=1)
   352
   353                                                       boxes2 = []
   354                                                       for lx, rx in zip(filtered_split_idxs, filtered_split_idxs[1:]):
   355                                                           batch_box = filtered_dets[lx:rx]
   356                                                           boxes2.append(batch_box)
   357
   358                                                       boxes = boxes2
   359
   360        21         95.0      4.5      0.0          return boxes


Pystone time: 4.67617 s
File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
Function: _nms at line 362

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   362                                               @profiler.profile
   363                                               def _nms(self, boxes, mode=1):
   364                                                   """ Non maximum suppression.
   365                                                   Source: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
   366
   367                                                   Args:
   368                                                     boxes (tensor): Bounding boxes from get_detections
   369
   370                                                   Return:
   371                                                     (tensor): Pruned boxes
   372
   373                                                   CommandLine:
   374                                                       python -m netharn.models.yolo2.light_postproc GetBoundingBoxes._nms --profile
   375
   376                                                   Examples:
   377                                                       >>> import torch
   378                                                       >>> torch.random.manual_seed(0)
   379                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
   380                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
   381                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.01, nms_thresh=0.5)
   382                                                       >>> output = torch.randn(8, 125, 9, 9)
   383                                                       >>> boxes_ = self._get_boxes(output.data)
   384                                                       >>> from netharn import util
   385                                                       >>> boxes = torch.Tensor(boxes_[0])
   386                                                       >>> scores = boxes[..., 4:5]
   387                                                       >>> classes = boxes[..., 5:6]
   388                                                       >>> cxywh = util.Boxes(boxes[..., 0:4], 'cxywh')
   389                                                       >>> tlbr = cxywh.as_tlbr()
   390                                                       >>> util.non_max_supression(tlbr.data.numpy(), scores.numpy().ravel(), self.nms_thresh)
   391                                                       >>> self._nms(boxes, mode=0)
   392                                                       >>> self._nms(boxes, mode=1)
   393
   394                                                   Benchmark:
   395                                                       boxes = torch.Tensor(boxes_[0])
   396                                                       import ubelt
   397                                                       for timer in ubelt.Timerit(100, bestof=10, label='nms0+cpu'):
   398                                                           with timer:
   399                                                               self._nms(boxes, mode=0)
   400
   401                                                       for timer in ubelt.Timerit(100, bestof=10, label='nms1+cpu'):
   402                                                           with timer:
   403                                                               self._nms(boxes, mode=1)
   404
   405                                                       boxes = boxes.cuda()
   406                                                       import ubelt
   407                                                       for timer in ubelt.Timerit(100, bestof=10, label='nms0+gpu'):
   408                                                           with timer:
   409                                                               self._nms(boxes, mode=0)
   410
   411                                                       for timer in ubelt.Timerit(100, bestof=10, label='nms1+gpu'):
   412                                                           with timer:
   413                                                               self._nms(boxes, mode=1)
   414                                                   """
   415       336       1387.0      4.1      0.0          if boxes.numel() == 0:
   416                                                       return boxes
   417
   418       336       2338.0      7.0      0.0          a = boxes[:, :2]
   419       336       1519.0      4.5      0.0          b = boxes[:, 2:4]
   420       336      15477.0     46.1      0.3          bboxes = torch.cat([a - b / 2, a + b / 2], 1)
   421       336       1945.0      5.8      0.0          scores = boxes[:, 4]
   422
   423       336        953.0      2.8      0.0          if mode == 1:
   424                                                       bboxes = bboxes.cpu().numpy().astype(np.float32)
   425                                                       scores = scores.cpu().numpy().astype(np.float32)
   426                                                       classes = boxes[..., 5].cpu().numpy().astype(np.int)
   427                                                       keep = []
   428                                                       for idxs in ub.group_items(range(len(classes)), classes).values():
   429                                                           cls_boxes = bboxes.take(idxs, axis=0)
   430                                                           cls_scores = scores.take(idxs, axis=0)
   431                                                           cls_keep = util.non_max_supression(cls_boxes, cls_scores,
   432                                                                                              self.nms_thresh)
   433                                                           keep.extend(list(ub.take(idxs, cls_keep)))
   434                                                       keep = sorted(keep)
   435                                                       return boxes[torch.LongTensor(keep)]
   436       336        949.0      2.8      0.0          elif mode == 0 or mode == 2:
   437                                                       # if torch.cuda.is_available:
   438                                                       #     boxes = boxes.cuda()
   439
   440       336       1409.0      4.2      0.0              x1 = bboxes[:, 0]
   441       336       1412.0      4.2      0.0              y1 = bboxes[:, 1]
   442       336       1403.0      4.2      0.0              x2 = bboxes[:, 2]
   443       336       1431.0      4.3      0.0              y2 = bboxes[:, 3]
   444
   445       336       4133.0     12.3      0.1              areas = ((x2 - x1) * (y2 - y1))
   446       336       4389.0     13.1      0.1              _, order = scores.sort(0, descending=True)
   447
   448       336        974.0      2.9      0.0              keep = []
   449     28686      95156.0      3.3      2.0              while order.numel() > 0:
   450     28686      86603.0      3.0      1.9                  if order.numel() == 1:
   451       336       1582.0      4.7      0.0                      if torch.__version__.startswith('0.3'):
   452                                                                   i = order[0]
   453                                                               else:
   454       336       1362.0      4.1      0.0                          i = order.item()
   455       336       1007.0      3.0      0.0                      i = order.item()
   456       336       1024.0      3.0      0.0                      keep.append(i)
   457       336        971.0      2.9      0.0                      break
   458
   459     28350     131201.0      4.6      2.8                  i = order[0]
   460     28350      86250.0      3.0      1.8                  keep.append(i)
   461
   462     28350     557488.0     19.7     11.9                  xx1 = x1[order[1:]].clamp(min=x1[i])
   463     28350     523500.0     18.5     11.2                  yy1 = y1[order[1:]].clamp(min=y1[i])
   464     28350     523108.0     18.5     11.2                  xx2 = x2[order[1:]].clamp(max=x2[i])
   465     28350     513569.0     18.1     11.0                  yy2 = y2[order[1:]].clamp(max=y2[i])
   466
   467     28350     225796.0      8.0      4.8                  w = (xx2 - xx1).clamp(min=0)
   468     28350     197729.0      7.0      4.2                  h = (yy2 - yy1).clamp(min=0)
   469     28350     162712.0      5.7      3.5                  inter = w * h
   470
   471     28350     768737.0     27.1     16.4                  iou = inter / (areas[i] + areas[order[1:]] - inter)
   472
   473     28350     297716.0     10.5      6.4                  ids = (iou <= self.nms_thresh).nonzero().squeeze()
   474     28350      97075.0      3.4      2.1                  if ids.numel() == 0:
   475                                                               break
   476     28350     345191.0     12.2      7.4                  order = order[ids + 1]
   477       336      18678.0     55.6      0.4              return boxes[torch.LongTensor(keep)]


Pystone time: 12.6522 s
File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
Function: __call__ at line 44

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    44                                               @profiler.profile
    45                                               def __call__(self, network_output, mode=1):
    46                                                   """ Compute bounding boxes after thresholding and nms
    47
    48                                                       network_output (torch.autograd.Variable): Output tensor from the lightnet network
    49
    50                                                   # Examples:
    51                                                   #     >>> import torch
    52                                                   #     >>> torch.random.manual_seed(0)
    53                                                   #     >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
    54                                                   #     >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
    55                                                   #     >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
    56                                                   #     >>> output = torch.randn(8, 125, 9, 9)
    57                                                   #     >>> boxes = self(output)
    58                                                   #     >>> assert len(boxes) == 8
    59                                                   #     >>> assert all(b.shape[1] == 6 for b in boxes)
    60
    61                                                   CommandLine:
    62                                                       python -m netharn.models.yolo2.light_postproc GetBoundingBoxes.__call__:1 --profile
    63                                                       python -m netharn.models.yolo2.light_postproc GetBoundingBoxes.__call__:2 --profile
    64
    65                                                   Example:
    66                                                       >>> import torch
    67                                                       >>> torch.random.manual_seed(0)
    68                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
    69                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
    70                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
    71                                                       >>> import ubelt
    72                                                       >>> output = torch.randn(16, 125, 9, 9)
    73                                                       >>> output = output.cuda()
    74                                                       >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+gpu'):
    75                                                       >>>     output_ = output.clone()
    76                                                       >>>     with timer:
    77                                                       >>>         self(output_, mode=0)
    78
    79                                                   Example:
    80                                                       >>> import torch
    81                                                       >>> torch.random.manual_seed(0)
    82                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
    83                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
    84                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
    85                                                       >>> import ubelt
    86                                                       >>> output = torch.randn(16, 125, 9, 9)
    87                                                       >>> output = output.cuda()
    88                                                       >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+gpu'):
    89                                                       >>>     output_ = output.clone()
    90                                                       >>>     with timer:
    91                                                       >>>         self(output_, mode=1)
    92
    93                                                   Benchmark:
    94                                                       >>> import torch
    95                                                       >>> torch.random.manual_seed(0)
    96                                                       >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
    97                                                       >>>                     8.09892,9.47112,4.84053,11.2364,10.0071])
    98                                                       >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
    99                                                       >>> import ubelt
   100                                                       >>> output = torch.randn(16, 125, 9, 9)
   101                                                       >>> #
   102                                                       >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+cpu'):
   103                                                       >>>     output_ = output.clone()
   104                                                       >>>     with timer:
   105                                                       >>>         self(output_, mode=0)
   106                                                       >>> #
   107                                                       >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+cpu'):
   108                                                       >>>     output_ = output.clone()
   109                                                       >>>     with timer:
   110                                                       >>>         self(output_, mode=1)
   111                                                       >>> #
   112                                                       >>> output = output.cuda()
   113                                                       >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+gpu'):
   114                                                       >>>     output_ = output.clone()
   115                                                       >>>     with timer:
   116                                                       >>>         self(output_, mode=0)
   117                                                       >>> #
   118                                                       >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+gpu'):
   119                                                       >>>     output_ = output.clone()
   120                                                       >>>     with timer:
   121                                                       >>>         self(output_, mode=1)
   122                                                       >>> #for timer in ubelt.Timerit(21, bestof=3, label='mode2+gpu'):
   123                                                       >>> #    output_ = output.clone()
   124                                                       >>> #    with timer:
   125                                                       >>> #        self(output_, mode=2)
   126
   127                                                       %timeit self(output.data, mode=0)
   128                                                       %timeit self(output.data, mode=1)
   129                                                       %timeit self(output.data, mode=2)
   130                                                   """
   131        21    6604884.0 314518.3     52.2          boxes = self._get_boxes(network_output.data, mode=mode)
   132        21    6002820.0 285848.6     47.4          boxes = [self._nms(box, mode=mode) for box in boxes]
   133
   134                                                   # force all boxes to be inside the image
   135        21      44388.0   2113.7      0.4          boxes = [self._clip_boxes(box) for box in boxes]
   136        21         48.0      2.3      0.0          postout = boxes
   137        21         43.0      2.0      0.0          return postout


 -1.00 seconds - None                        :None:None
  0.03 seconds - GetBoundingBoxes._clip_boxes:/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:139
  4.16 seconds - GetBoundingBoxes._get_boxes :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:202
  4.68 seconds - GetBoundingBoxes._nms       :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:362
 12.65 seconds - GetBoundingBoxes.__call__   :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:44
	Timer unit: 1e-06 s


	Pystone time: 0.034663 s
	File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
	Function: _clip_boxes at line 139

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	139 @profiler.profile
	140 def _clip_boxes(self, box):
	141 """
	142 CommandLine:
	143 python ~/code/netharn/netharn/models/yolo2/light_postproc.py GetBoundingBoxes._clip_boxes
	144
	145 Example:
	146 >>> import torch
	147 >>> torch.random.manual_seed(0)
	148 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	149 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	150 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	151 >>> # Make random boxes for one item in a batch
	152 >>> box = torch.randn(7, 6)
	153 >>> box[:, 0].sigmoid_()
	154 >>> box[:, 1].sigmoid_()
	155 >>> box.abs_()
	156 >>> new_box = self._clip_boxes(box)
	157 >>> box_tlbr = util.Boxes(box.cpu().numpy()[:, 0:4], 'cxywh').as_tlbr().data
	158 >>> new_tlbr = util.Boxes(new_box.cpu().numpy()[:, 0:4], 'cxywh').as_tlbr().data
	159 >>> #
	160 >>> print('old')
	161 >>> print(box_tlbr)
	162 >>> print('new')
	163 >>> print(new_tlbr)
	164 >>> #print('trim_w = {}'.format(ub.repr2(trim_w.numpy(), precision=4)))
	165 >>> #print('trim_h = {}'.format(ub.repr2(trim_h.numpy(), precision=4)))
	166 >>> assert np.all(new_tlbr.T[2] <= 1.01)
	167 >>> assert np.all(new_tlbr.T[2] >= -0.01)
	168 >>> assert np.all(new_tlbr.T[3] <= 1.01)
	169 >>> assert np.all(new_tlbr.T[3] >= -0.01)
	170 """
	171 336 1027.0 3.1 3.0 if len(box) == 0:
	172 return box
	173
	174 336 4672.0 13.9 13.5 cx, cy, w, h = box.t()[0:4]
	175
	176 336 2505.0 7.5 7.2 x1 = cx - (w / 2)
	177 336 2056.0 6.1 5.9 x2 = cx + (w / 2)
	178
	179 336 1982.0 5.9 5.7 y1 = cy - (h / 2)
	180 336 1912.0 5.7 5.5 y2 = cy + (h / 2)
	181
	182 336 2658.0 7.9 7.7 trim_w1 = (0 - x1).clamp(0, None)
	183 336 1532.0 4.6 4.4 trim_w2 = (x2 - 1).clamp(0, None)
	184 # multiply by 2 because we are trimming from both sides
	185 336 1951.0 5.8 5.6 trim_w = torch.max(trim_w1, trim_w2) * 2
	186
	187 336 2343.0 7.0 6.8 trim_h1 = (0 - y1).clamp(0, None)
	188 336 1502.0 4.5 4.3 trim_h2 = (y2 - 1).clamp(0, None)
	189 336 1578.0 4.7 4.6 trim_h = torch.max(trim_h1, trim_h2) * 2
	190
	191 336 1199.0 3.6 3.5 new_box = box.clone()
	192 336 4124.0 12.3 11.9 new_box[:, 2] = new_box[:, 2] - trim_w
	193 336 3218.0 9.6 9.3 new_box[:, 3] = new_box[:, 3] - trim_h
	194 336 404.0 1.2 1.2 return new_box


	Pystone time: 4.16157 s
	File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
	Function: _get_boxes at line 202

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	202 @profiler.profile
	203 def _get_boxes(self, output, mode=1):
	204 """
	205 Returns array of detections for every image in batch
	206
	207 Examples:
	208 >>> import torch
	209 >>> torch.random.manual_seed(0)
	210 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	211 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	212 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	213 >>> output = torch.randn(16, 125, 9, 9)
	214 >>> from netharn import XPU
	215 >>> output = XPU.cast('gpu').move(output)
	216 >>> boxes = self._get_boxes(output.data)
	217 >>> assert len(boxes) == 16
	218 >>> assert all(len(b[0]) == 6 for b in boxes)
	219
	220 Benchmark:
	221 >>> from netharn.models.yolo2.light_postproc import *
	222 >>> import torch
	223 >>> torch.random.manual_seed(0)
	224 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	225 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	226 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	227 >>> output = torch.randn(16, 125, 9, 9)
	228 >>> from netharn import XPU
	229 >>> output = XPU.cast('gpu').move(output)
	230 >>> for timer in ub.Timerit(100, bestof=10, label='mode 0'):
	231 >>> output_ = output.clone()
	232 >>> with timer:
	233 >>> boxes0 = self._get_boxes(output_.data, mode=0)
	234 >>> for timer in ub.Timerit(100, bestof=10, label='mode 1'):
	235 >>> output_ = output.clone()
	236 >>> with timer:
	237 >>> boxes1 = self._get_boxes(output_.data, mode=1)
	238 >>> for b0, b1 in zip(boxes0, boxes1):
	239 >>> assert np.all(b0.cpu() == b1.cpu())
	240
	241 >>> from lightnet.data.postprocess import GetBoundingBoxes as GetBoundingBoxesOrig
	242 >>> anchors_dict = dict(num=5, values=[1.3221,1.73145,3.19275,4.00944,5.05587,
	243 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	244 >>> post = GetBoundingBoxesOrig(anchors=anchors_dict, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	245 >>> for timer in ub.Timerit(100, bestof=10, label='original'):
	246 >>> output_ = output.clone()
	247 >>> with timer:
	248 >>> boxes3 = post._get_boxes(output_.data)
	249 >>> # Check that the output is the same
	250 >>> for b0, b3 in zip(boxes0, boxes3):
	251 >>> b3_ = torch.Tensor(b3)
	252 >>> assert np.all(b0.cpu() == b3_.cpu())
	253 """
	254 # dont modify inplace
	255 21 353.0 16.8 0.0 output = output.clone()
	256
	257 # Check dimensions
	258 21 109.0 5.2 0.0 if output.dim() == 3:
	259 output.unsqueeze_(0)
	260
	261 # Variables
	262 21 105.0 5.0 0.0 cuda = output.is_cuda
	263 21 123.0 5.9 0.0 bsize = output.size(0)
	264 21 99.0 4.7 0.0 h = output.size(2)
	265 21 97.0 4.6 0.0 w = output.size(3)
	266
	267 # Compute xc,yc, w,h, box_score on Tensor
	268 21 668.0 31.8 0.0 lin_x = torch.linspace(0, w - 1, w).repeat(h, 1).view(h * w)
	269 21 510.0 24.3 0.0 lin_y = torch.linspace(0, h - 1, h).repeat(w, 1).t().contiguous().view(h * w)
	270 21 589.0 28.0 0.0 anchor_w = torch.Tensor(self.anchors[::2]).view(1, self.num_anchors, 1)
	271 21 224.0 10.7 0.0 anchor_h = torch.Tensor(self.anchors[1::2]).view(
	272 21 165.0 7.9 0.0 1, self.num_anchors, 1)
	273 21 88.0 4.2 0.0 if cuda:
	274 21 683.0 32.5 0.0 lin_x = lin_x.cuda()
	275 21 351.0 16.7 0.0 lin_y = lin_y.cuda()
	276 21 321.0 15.3 0.0 anchor_w = anchor_w.cuda()
	277 21 304.0 14.5 0.0 anchor_h = anchor_h.cuda()
	278
	279 # -1 == 5+num_classes (we can drop feature maps if 1 class)
	280 21 248.0 11.8 0.0 output_ = output.view(bsize, self.num_anchors, -1, h * w)
	281 21 1236.0 58.9 0.0 output_[:, :, 0, :].sigmoid_().add_(lin_x).div_(w) # X center
	282 21 678.0 32.3 0.0 output_[:, :, 1, :].sigmoid_().add_(lin_y).div_(h) # Y center
	283 21 821.0 39.1 0.0 output_[:, :, 2, :].exp_().mul_(anchor_w).div_(w) # Width
	284 21 609.0 29.0 0.0 output_[:, :, 3, :].exp_().mul_(anchor_h).div_(h) # Height
	285 21 317.0 15.1 0.0 output_[:, :, 4, :].sigmoid_() # Box score
	286
	287 # Compute class_score
	288 21 103.0 4.9 0.0 if self.num_classes > 1:
	289 21 114.0 5.4 0.0 if torch.__version__.startswith('0.3'):
	290 cls_scores = torch.nn.functional.softmax(
	291 Variable(output_[:, :, 5:, :], volatile=True), 2).data
	292 else:
	293 21 150.0 7.1 0.0 cls_scores = torch.nn.functional.softmax(
	294 21 1204.0 57.3 0.0 output_[:, :, 5:, :], 2)
	295 21 580.0 27.6 0.0 cls_max, cls_max_idx = torch.max(cls_scores, 2)
	296 21 381.0 18.1 0.0 cls_max.mul_(output_[:, :, 4, :])
	297 else:
	298 cls_max = output_[:, :, 4, :]
	299 cls_max_idx = torch.zeros_like(cls_max)
	300
	301 # Save detection if conf*class_conf is higher than threshold
	302
	303 21 98.0 4.7 0.0 if mode == 0:
	304 21 2910.0 138.6 0.1 output_ = output_.cpu()
	305 21 571.0 27.2 0.0 cls_max = cls_max.cpu()
	306 21 729.0 34.7 0.0 cls_max_idx = cls_max_idx.cpu()
	307 21 94.0 4.5 0.0 boxes = []
	308 357 1576.0 4.4 0.0 for b in range(bsize):
	309 336 1560.0 4.6 0.0 box_batch = []
	310 2016 9407.0 4.7 0.2 for a in range(self.num_anchors):
	311 137760 623744.0 4.5 15.0 for i in range(h * w):
	312 136080 1446787.0 10.6 34.8 if cls_max[b, a, i] > self.conf_thresh:
	313 33411 152048.0 4.6 3.7 box_batch.append([
	314 33411 314918.0 9.4 7.6 output_[b, a, 0, i],
	315 33411 308103.0 9.2 7.4 output_[b, a, 1, i],
	316 33411 306491.0 9.2 7.4 output_[b, a, 2, i],
	317 33411 306215.0 9.2 7.4 output_[b, a, 3, i],
	318 33411 273042.0 8.2 6.6 cls_max[b, a, i],
	319 33411 298077.0 8.9 7.2 cls_max_idx[b, a, i]
	320 ])
	321 336 102186.0 304.1 2.5 box_batch = torch.Tensor(box_batch)
	322 336 1691.0 5.0 0.0 boxes.append(box_batch)
	323 elif mode == 1 or mode == 2:
	324 # Save detection if conf*class_conf is higher than threshold
	325 flags = cls_max > self.conf_thresh
	326 flat_flags = flags.view(-1)
	327
	328 if not np.any(flat_flags):
	329 return [torch.FloatTensor([]) for _ in range(bsize)]
	330
	331 # number of potential detections per batch
	332 item_size = np.prod(flags.shape[1:])
	333 slices = [slice((item_size * i), (item_size * (i + 1)))
	334 for i in range(bsize)]
	335 # number of detections per batch (prepended with a zero)
	336 n_dets = torch.stack(
	337 [flat_flags[0].long() * 0] + [flat_flags[sl].long().sum() for sl in slices])
	338 # indices of splits between filtered detections
	339 filtered_split_idxs = torch.cumsum(n_dets, dim=0)
	340
	341 # Do actual filtering of detections by confidence thresh
	342 flat_coords = output_.transpose(2, 3)[..., 0:4].clone().view(-1, 4)
	343 flat_class_max = cls_max.view(-1)
	344 flat_class_idx = cls_max_idx.view(-1)
	345
	346 coords = flat_coords[flat_flags]
	347 scores = flat_class_max[flat_flags]
	348 cls_idxs = flat_class_idx[flat_flags]
	349
	350 filtered_dets = torch.cat([coords, scores[:, None],
	351 cls_idxs[:, None].float()], dim=1)
	352
	353 boxes2 = []
	354 for lx, rx in zip(filtered_split_idxs, filtered_split_idxs[1:]):
	355 batch_box = filtered_dets[lx:rx]
	356 boxes2.append(batch_box)
	357
	358 boxes = boxes2
	359
	360 21 95.0 4.5 0.0 return boxes


	Pystone time: 4.67617 s
	File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
	Function: _nms at line 362

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	362 @profiler.profile
	363 def _nms(self, boxes, mode=1):
	364 """ Non maximum suppression.
	365 Source: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
	366
	367 Args:
	368 boxes (tensor): Bounding boxes from get_detections
	369
	370 Return:
	371 (tensor): Pruned boxes
	372
	373 CommandLine:
	374 python -m netharn.models.yolo2.light_postproc GetBoundingBoxes._nms --profile
	375
	376 Examples:
	377 >>> import torch
	378 >>> torch.random.manual_seed(0)
	379 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	380 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	381 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.01, nms_thresh=0.5)
	382 >>> output = torch.randn(8, 125, 9, 9)
	383 >>> boxes_ = self._get_boxes(output.data)
	384 >>> from netharn import util
	385 >>> boxes = torch.Tensor(boxes_[0])
	386 >>> scores = boxes[..., 4:5]
	387 >>> classes = boxes[..., 5:6]
	388 >>> cxywh = util.Boxes(boxes[..., 0:4], 'cxywh')
	389 >>> tlbr = cxywh.as_tlbr()
	390 >>> util.non_max_supression(tlbr.data.numpy(), scores.numpy().ravel(), self.nms_thresh)
	391 >>> self._nms(boxes, mode=0)
	392 >>> self._nms(boxes, mode=1)
	393
	394 Benchmark:
	395 boxes = torch.Tensor(boxes_[0])
	396 import ubelt
	397 for timer in ubelt.Timerit(100, bestof=10, label='nms0+cpu'):
	398 with timer:
	399 self._nms(boxes, mode=0)
	400
	401 for timer in ubelt.Timerit(100, bestof=10, label='nms1+cpu'):
	402 with timer:
	403 self._nms(boxes, mode=1)
	404
	405 boxes = boxes.cuda()
	406 import ubelt
	407 for timer in ubelt.Timerit(100, bestof=10, label='nms0+gpu'):
	408 with timer:
	409 self._nms(boxes, mode=0)
	410
	411 for timer in ubelt.Timerit(100, bestof=10, label='nms1+gpu'):
	412 with timer:
	413 self._nms(boxes, mode=1)
	414 """
	415 336 1387.0 4.1 0.0 if boxes.numel() == 0:
	416 return boxes
	417
	418 336 2338.0 7.0 0.0 a = boxes[:, :2]
	419 336 1519.0 4.5 0.0 b = boxes[:, 2:4]
	420 336 15477.0 46.1 0.3 bboxes = torch.cat([a - b / 2, a + b / 2], 1)
	421 336 1945.0 5.8 0.0 scores = boxes[:, 4]
	422
	423 336 953.0 2.8 0.0 if mode == 1:
	424 bboxes = bboxes.cpu().numpy().astype(np.float32)
	425 scores = scores.cpu().numpy().astype(np.float32)
	426 classes = boxes[..., 5].cpu().numpy().astype(np.int)
	427 keep = []
	428 for idxs in ub.group_items(range(len(classes)), classes).values():
	429 cls_boxes = bboxes.take(idxs, axis=0)
	430 cls_scores = scores.take(idxs, axis=0)
	431 cls_keep = util.non_max_supression(cls_boxes, cls_scores,
	432 self.nms_thresh)
	433 keep.extend(list(ub.take(idxs, cls_keep)))
	434 keep = sorted(keep)
	435 return boxes[torch.LongTensor(keep)]
	436 336 949.0 2.8 0.0 elif mode == 0 or mode == 2:
	437 # if torch.cuda.is_available:
	438 # boxes = boxes.cuda()
	439
	440 336 1409.0 4.2 0.0 x1 = bboxes[:, 0]
	441 336 1412.0 4.2 0.0 y1 = bboxes[:, 1]
	442 336 1403.0 4.2 0.0 x2 = bboxes[:, 2]
	443 336 1431.0 4.3 0.0 y2 = bboxes[:, 3]
	444
	445 336 4133.0 12.3 0.1 areas = ((x2 - x1) * (y2 - y1))
	446 336 4389.0 13.1 0.1 _, order = scores.sort(0, descending=True)
	447
	448 336 974.0 2.9 0.0 keep = []
	449 28686 95156.0 3.3 2.0 while order.numel() > 0:
	450 28686 86603.0 3.0 1.9 if order.numel() == 1:
	451 336 1582.0 4.7 0.0 if torch.__version__.startswith('0.3'):
	452 i = order[0]
	453 else:
	454 336 1362.0 4.1 0.0 i = order.item()
	455 336 1007.0 3.0 0.0 i = order.item()
	456 336 1024.0 3.0 0.0 keep.append(i)
	457 336 971.0 2.9 0.0 break
	458
	459 28350 131201.0 4.6 2.8 i = order[0]
	460 28350 86250.0 3.0 1.8 keep.append(i)
	461
	462 28350 557488.0 19.7 11.9 xx1 = x1[order[1:]].clamp(min=x1[i])
	463 28350 523500.0 18.5 11.2 yy1 = y1[order[1:]].clamp(min=y1[i])
	464 28350 523108.0 18.5 11.2 xx2 = x2[order[1:]].clamp(max=x2[i])
	465 28350 513569.0 18.1 11.0 yy2 = y2[order[1:]].clamp(max=y2[i])
	466
	467 28350 225796.0 8.0 4.8 w = (xx2 - xx1).clamp(min=0)
	468 28350 197729.0 7.0 4.2 h = (yy2 - yy1).clamp(min=0)
	469 28350 162712.0 5.7 3.5 inter = w * h
	470
	471 28350 768737.0 27.1 16.4 iou = inter / (areas[i] + areas[order[1:]] - inter)
	472
	473 28350 297716.0 10.5 6.4 ids = (iou <= self.nms_thresh).nonzero().squeeze()
	474 28350 97075.0 3.4 2.1 if ids.numel() == 0:
	475 break
	476 28350 345191.0 12.2 7.4 order = order[ids + 1]
	477 336 18678.0 55.6 0.4 return boxes[torch.LongTensor(keep)]


	Pystone time: 12.6522 s
	File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
	Function: __call__ at line 44

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	44 @profiler.profile
	45 def __call__(self, network_output, mode=1):
	46 """ Compute bounding boxes after thresholding and nms
	47
	48 network_output (torch.autograd.Variable): Output tensor from the lightnet network
	49
	50 # Examples:
	51 # >>> import torch
	52 # >>> torch.random.manual_seed(0)
	53 # >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	54 # >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	55 # >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	56 # >>> output = torch.randn(8, 125, 9, 9)
	57 # >>> boxes = self(output)
	58 # >>> assert len(boxes) == 8
	59 # >>> assert all(b.shape[1] == 6 for b in boxes)
	60
	61 CommandLine:
	62 python -m netharn.models.yolo2.light_postproc GetBoundingBoxes.__call__:1 --profile
	63 python -m netharn.models.yolo2.light_postproc GetBoundingBoxes.__call__:2 --profile
	64
	65 Example:
	66 >>> import torch
	67 >>> torch.random.manual_seed(0)
	68 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	69 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	70 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	71 >>> import ubelt
	72 >>> output = torch.randn(16, 125, 9, 9)
	73 >>> output = output.cuda()
	74 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+gpu'):
	75 >>> output_ = output.clone()
	76 >>> with timer:
	77 >>> self(output_, mode=0)
	78
	79 Example:
	80 >>> import torch
	81 >>> torch.random.manual_seed(0)
	82 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	83 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	84 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	85 >>> import ubelt
	86 >>> output = torch.randn(16, 125, 9, 9)
	87 >>> output = output.cuda()
	88 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+gpu'):
	89 >>> output_ = output.clone()
	90 >>> with timer:
	91 >>> self(output_, mode=1)
	92
	93 Benchmark:
	94 >>> import torch
	95 >>> torch.random.manual_seed(0)
	96 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
	97 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
	98 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
	99 >>> import ubelt
	100 >>> output = torch.randn(16, 125, 9, 9)
	101 >>> #
	102 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+cpu'):
	103 >>> output_ = output.clone()
	104 >>> with timer:
	105 >>> self(output_, mode=0)
	106 >>> #
	107 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+cpu'):
	108 >>> output_ = output.clone()
	109 >>> with timer:
	110 >>> self(output_, mode=1)
	111 >>> #
	112 >>> output = output.cuda()
	113 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+gpu'):
	114 >>> output_ = output.clone()
	115 >>> with timer:
	116 >>> self(output_, mode=0)
	117 >>> #
	118 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+gpu'):
	119 >>> output_ = output.clone()
	120 >>> with timer:
	121 >>> self(output_, mode=1)
	122 >>> #for timer in ubelt.Timerit(21, bestof=3, label='mode2+gpu'):
	123 >>> # output_ = output.clone()
	124 >>> # with timer:
	125 >>> # self(output_, mode=2)
	126
	127 %timeit self(output.data, mode=0)
	128 %timeit self(output.data, mode=1)
	129 %timeit self(output.data, mode=2)
	130 """
	131 21 6604884.0 314518.3 52.2 boxes = self._get_boxes(network_output.data, mode=mode)
	132 21 6002820.0 285848.6 47.4 boxes = [self._nms(box, mode=mode) for box in boxes]
	133
	134 # force all boxes to be inside the image
	135 21 44388.0 2113.7 0.4 boxes = [self._clip_boxes(box) for box in boxes]
	136 21 48.0 2.3 0.0 postout = boxes
	137 21 43.0 2.0 0.0 return postout


	-1.00 seconds - None :None:None
	0.03 seconds - GetBoundingBoxes._clip_boxes:/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:139
	4.16 seconds - GetBoundingBoxes._get_boxes :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:202
	4.68 seconds - GetBoundingBoxes._nms :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:362
	12.65 seconds - GetBoundingBoxes.__call__ :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:44