Skip to content

Instantly share code, notes, and snippets.

@Erotemic
Created April 4, 2018 19:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Erotemic/e6be4658bad9fa28a56bf4e7c516a820 to your computer and use it in GitHub Desktop.
Save Erotemic/e6be4658bad9fa28a56bf4e7c516a820 to your computer and use it in GitHub Desktop.
Mode 1 Times
Timer unit: 1e-06 s
Pystone time: 0.05441 s
File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
Function: _get_boxes at line 202
Line # Hits Time Per Hit % Time Line Contents
==============================================================
202 @profiler.profile
203 def _get_boxes(self, output, mode=1):
204 """
205 Returns array of detections for every image in batch
206
207 Examples:
208 >>> import torch
209 >>> torch.random.manual_seed(0)
210 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
211 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
212 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
213 >>> output = torch.randn(16, 125, 9, 9)
214 >>> from netharn import XPU
215 >>> output = XPU.cast('gpu').move(output)
216 >>> boxes = self._get_boxes(output.data)
217 >>> assert len(boxes) == 16
218 >>> assert all(len(b[0]) == 6 for b in boxes)
219
220 Benchmark:
221 >>> from netharn.models.yolo2.light_postproc import *
222 >>> import torch
223 >>> torch.random.manual_seed(0)
224 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
225 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
226 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
227 >>> output = torch.randn(16, 125, 9, 9)
228 >>> from netharn import XPU
229 >>> output = XPU.cast('gpu').move(output)
230 >>> for timer in ub.Timerit(100, bestof=10, label='mode 0'):
231 >>> output_ = output.clone()
232 >>> with timer:
233 >>> boxes0 = self._get_boxes(output_.data, mode=0)
234 >>> for timer in ub.Timerit(100, bestof=10, label='mode 1'):
235 >>> output_ = output.clone()
236 >>> with timer:
237 >>> boxes1 = self._get_boxes(output_.data, mode=1)
238 >>> for b0, b1 in zip(boxes0, boxes1):
239 >>> assert np.all(b0.cpu() == b1.cpu())
240
241 >>> from lightnet.data.postprocess import GetBoundingBoxes as GetBoundingBoxesOrig
242 >>> anchors_dict = dict(num=5, values=[1.3221,1.73145,3.19275,4.00944,5.05587,
243 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
244 >>> post = GetBoundingBoxesOrig(anchors=anchors_dict, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
245 >>> for timer in ub.Timerit(100, bestof=10, label='original'):
246 >>> output_ = output.clone()
247 >>> with timer:
248 >>> boxes3 = post._get_boxes(output_.data)
249 >>> # Check that the output is the same
250 >>> for b0, b3 in zip(boxes0, boxes3):
251 >>> b3_ = torch.Tensor(b3)
252 >>> assert np.all(b0.cpu() == b3_.cpu())
253 """
254 # dont modify inplace
255 21 339.0 16.1 0.6 output = output.clone()
256
257 # Check dimensions
258 21 108.0 5.1 0.2 if output.dim() == 3:
259 output.unsqueeze_(0)
260
261 # Variables
262 21 118.0 5.6 0.2 cuda = output.is_cuda
263 21 151.0 7.2 0.3 bsize = output.size(0)
264 21 96.0 4.6 0.2 h = output.size(2)
265 21 92.0 4.4 0.2 w = output.size(3)
266
267 # Compute xc,yc, w,h, box_score on Tensor
268 21 833.0 39.7 1.5 lin_x = torch.linspace(0, w - 1, w).repeat(h, 1).view(h * w)
269 21 598.0 28.5 1.1 lin_y = torch.linspace(0, h - 1, h).repeat(w, 1).t().contiguous().view(h * w)
270 21 575.0 27.4 1.1 anchor_w = torch.Tensor(self.anchors[::2]).view(1, self.num_anchors, 1)
271 21 203.0 9.7 0.4 anchor_h = torch.Tensor(self.anchors[1::2]).view(
272 21 184.0 8.8 0.3 1, self.num_anchors, 1)
273 21 109.0 5.2 0.2 if cuda:
274 21 656.0 31.2 1.2 lin_x = lin_x.cuda()
275 21 338.0 16.1 0.6 lin_y = lin_y.cuda()
276 21 314.0 15.0 0.6 anchor_w = anchor_w.cuda()
277 21 304.0 14.5 0.6 anchor_h = anchor_h.cuda()
278
279 # -1 == 5+num_classes (we can drop feature maps if 1 class)
280 21 223.0 10.6 0.4 output_ = output.view(bsize, self.num_anchors, -1, h * w)
281 21 1005.0 47.9 1.8 output_[:, :, 0, :].sigmoid_().add_(lin_x).div_(w) # X center
282 21 707.0 33.7 1.3 output_[:, :, 1, :].sigmoid_().add_(lin_y).div_(h) # Y center
283 21 761.0 36.2 1.4 output_[:, :, 2, :].exp_().mul_(anchor_w).div_(w) # Width
284 21 585.0 27.9 1.1 output_[:, :, 3, :].exp_().mul_(anchor_h).div_(h) # Height
285 21 309.0 14.7 0.6 output_[:, :, 4, :].sigmoid_() # Box score
286
287 # Compute class_score
288 21 101.0 4.8 0.2 if self.num_classes > 1:
289 21 139.0 6.6 0.3 if torch.__version__.startswith('0.3'):
290 cls_scores = torch.nn.functional.softmax(
291 Variable(output_[:, :, 5:, :], volatile=True), 2).data
292 else:
293 21 125.0 6.0 0.2 cls_scores = torch.nn.functional.softmax(
294 21 1123.0 53.5 2.1 output_[:, :, 5:, :], 2)
295 21 587.0 28.0 1.1 cls_max, cls_max_idx = torch.max(cls_scores, 2)
296 21 428.0 20.4 0.8 cls_max.mul_(output_[:, :, 4, :])
297 else:
298 cls_max = output_[:, :, 4, :]
299 cls_max_idx = torch.zeros_like(cls_max)
300
301 # Save detection if conf*class_conf is higher than threshold
302
303 21 95.0 4.5 0.2 if mode == 0:
304 output_ = output_.cpu()
305 cls_max = cls_max.cpu()
306 cls_max_idx = cls_max_idx.cpu()
307 boxes = []
308 for b in range(bsize):
309 box_batch = []
310 for a in range(self.num_anchors):
311 for i in range(h * w):
312 if cls_max[b, a, i] > self.conf_thresh:
313 box_batch.append([
314 output_[b, a, 0, i],
315 output_[b, a, 1, i],
316 output_[b, a, 2, i],
317 output_[b, a, 3, i],
318 cls_max[b, a, i],
319 cls_max_idx[b, a, i]
320 ])
321 box_batch = torch.Tensor(box_batch)
322 boxes.append(box_batch)
323 21 89.0 4.2 0.2 elif mode == 1 or mode == 2:
324 # Save detection if conf*class_conf is higher than threshold
325 21 572.0 27.2 1.1 flags = cls_max > self.conf_thresh
326 21 272.0 13.0 0.5 flat_flags = flags.view(-1)
327
328 21 1915.0 91.2 3.5 if not np.any(flat_flags):
329 return [torch.FloatTensor([]) for _ in range(bsize)]
330
331 # number of potential detections per batch
332 21 797.0 38.0 1.5 item_size = np.prod(flags.shape[1:])
333 21 108.0 5.1 0.2 slices = [slice((item_size * i), (item_size * (i + 1)))
334 21 415.0 19.8 0.8 for i in range(bsize)]
335 # number of detections per batch (prepended with a zero)
336 21 108.0 5.1 0.2 n_dets = torch.stack(
337 21 14893.0 709.2 27.4 [flat_flags[0].long() * 0] + [flat_flags[sl].long().sum() for sl in slices])
338 # indices of splits between filtered detections
339 21 1523.0 72.5 2.8 filtered_split_idxs = torch.cumsum(n_dets, dim=0)
340
341 # Do actual filtering of detections by confidence thresh
342 21 710.0 33.8 1.3 flat_coords = output_.transpose(2, 3)[..., 0:4].clone().view(-1, 4)
343 21 159.0 7.6 0.3 flat_class_max = cls_max.view(-1)
344 21 167.0 8.0 0.3 flat_class_idx = cls_max_idx.view(-1)
345
346 21 2837.0 135.1 5.2 coords = flat_coords[flat_flags]
347 21 1990.0 94.8 3.7 scores = flat_class_max[flat_flags]
348 21 1443.0 68.7 2.7 cls_idxs = flat_class_idx[flat_flags]
349
350 21 251.0 12.0 0.5 filtered_dets = torch.cat([coords, scores[:, None],
351 21 1032.0 49.1 1.9 cls_idxs[:, None].float()], dim=1)
352
353 21 100.0 4.8 0.2 boxes2 = []
354 357 3904.0 10.9 7.2 for lx, rx in zip(filtered_split_idxs, filtered_split_idxs[1:]):
355 336 8135.0 24.2 15.0 batch_box = filtered_dets[lx:rx]
356 336 1604.0 4.8 2.9 boxes2.append(batch_box)
357
358 21 91.0 4.3 0.2 boxes = boxes2
359
360 21 89.0 4.2 0.2 return boxes
Pystone time: 0.094206 s
File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
Function: _clip_boxes at line 139
Line # Hits Time Per Hit % Time Line Contents
==============================================================
139 @profiler.profile
140 def _clip_boxes(self, box):
141 """
142 CommandLine:
143 python ~/code/netharn/netharn/models/yolo2/light_postproc.py GetBoundingBoxes._clip_boxes
144
145 Example:
146 >>> import torch
147 >>> torch.random.manual_seed(0)
148 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
149 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
150 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
151 >>> # Make random boxes for one item in a batch
152 >>> box = torch.randn(7, 6)
153 >>> box[:, 0].sigmoid_()
154 >>> box[:, 1].sigmoid_()
155 >>> box.abs_()
156 >>> new_box = self._clip_boxes(box)
157 >>> box_tlbr = util.Boxes(box.cpu().numpy()[:, 0:4], 'cxywh').as_tlbr().data
158 >>> new_tlbr = util.Boxes(new_box.cpu().numpy()[:, 0:4], 'cxywh').as_tlbr().data
159 >>> #
160 >>> print('old')
161 >>> print(box_tlbr)
162 >>> print('new')
163 >>> print(new_tlbr)
164 >>> #print('trim_w = {}'.format(ub.repr2(trim_w.numpy(), precision=4)))
165 >>> #print('trim_h = {}'.format(ub.repr2(trim_h.numpy(), precision=4)))
166 >>> assert np.all(new_tlbr.T[2] <= 1.01)
167 >>> assert np.all(new_tlbr.T[2] >= -0.01)
168 >>> assert np.all(new_tlbr.T[3] <= 1.01)
169 >>> assert np.all(new_tlbr.T[3] >= -0.01)
170 """
171 336 1332.0 4.0 1.4 if len(box) == 0:
172 return box
173
174 336 6518.0 19.4 6.9 cx, cy, w, h = box.t()[0:4]
175
176 336 7328.0 21.8 7.8 x1 = cx - (w / 2)
177 336 5956.0 17.7 6.3 x2 = cx + (w / 2)
178
179 336 5686.0 16.9 6.0 y1 = cy - (h / 2)
180 336 5466.0 16.3 5.8 y2 = cy + (h / 2)
181
182 336 9010.0 26.8 9.6 trim_w1 = (0 - x1).clamp(0, None)
183 336 5525.0 16.4 5.9 trim_w2 = (x2 - 1).clamp(0, None)
184 # multiply by 2 because we are trimming from both sides
185 336 6106.0 18.2 6.5 trim_w = torch.max(trim_w1, trim_w2) * 2
186
187 336 8363.0 24.9 8.9 trim_h1 = (0 - y1).clamp(0, None)
188 336 5343.0 15.9 5.7 trim_h2 = (y2 - 1).clamp(0, None)
189 336 5673.0 16.9 6.0 trim_h = torch.max(trim_h1, trim_h2) * 2
190
191 336 3806.0 11.3 4.0 new_box = box.clone()
192 336 9629.0 28.7 10.2 new_box[:, 2] = new_box[:, 2] - trim_w
193 336 7962.0 23.7 8.5 new_box[:, 3] = new_box[:, 3] - trim_h
194 336 503.0 1.5 0.5 return new_box
Pystone time: 3.45784 s
File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
Function: _nms at line 362
Line # Hits Time Per Hit % Time Line Contents
==============================================================
362 @profiler.profile
363 def _nms(self, boxes, mode=1):
364 """ Non maximum suppression.
365 Source: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
366
367 Args:
368 boxes (tensor): Bounding boxes from get_detections
369
370 Return:
371 (tensor): Pruned boxes
372
373 CommandLine:
374 python -m netharn.models.yolo2.light_postproc GetBoundingBoxes._nms --profile
375
376 Examples:
377 >>> import torch
378 >>> torch.random.manual_seed(0)
379 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
380 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
381 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.01, nms_thresh=0.5)
382 >>> output = torch.randn(8, 125, 9, 9)
383 >>> boxes_ = self._get_boxes(output.data)
384 >>> from netharn import util
385 >>> boxes = torch.Tensor(boxes_[0])
386 >>> scores = boxes[..., 4:5]
387 >>> classes = boxes[..., 5:6]
388 >>> cxywh = util.Boxes(boxes[..., 0:4], 'cxywh')
389 >>> tlbr = cxywh.as_tlbr()
390 >>> util.non_max_supression(tlbr.data.numpy(), scores.numpy().ravel(), self.nms_thresh)
391 >>> self._nms(boxes, mode=0)
392 >>> self._nms(boxes, mode=1)
393
394 Benchmark:
395 boxes = torch.Tensor(boxes_[0])
396 import ubelt
397 for timer in ubelt.Timerit(100, bestof=10, label='nms0+cpu'):
398 with timer:
399 self._nms(boxes, mode=0)
400
401 for timer in ubelt.Timerit(100, bestof=10, label='nms1+cpu'):
402 with timer:
403 self._nms(boxes, mode=1)
404
405 boxes = boxes.cuda()
406 import ubelt
407 for timer in ubelt.Timerit(100, bestof=10, label='nms0+gpu'):
408 with timer:
409 self._nms(boxes, mode=0)
410
411 for timer in ubelt.Timerit(100, bestof=10, label='nms1+gpu'):
412 with timer:
413 self._nms(boxes, mode=1)
414 """
415 336 2144.0 6.4 0.1 if boxes.numel() == 0:
416 return boxes
417
418 336 3259.0 9.7 0.1 a = boxes[:, :2]
419 336 1763.0 5.2 0.1 b = boxes[:, 2:4]
420 336 27196.0 80.9 0.8 bboxes = torch.cat([a - b / 2, a + b / 2], 1)
421 336 2762.0 8.2 0.1 scores = boxes[:, 4]
422
423 336 1038.0 3.1 0.0 if mode == 1:
424 336 18579.0 55.3 0.5 bboxes = bboxes.cpu().numpy().astype(np.float32)
425 336 12181.0 36.3 0.4 scores = scores.cpu().numpy().astype(np.float32)
426 336 12435.0 37.0 0.4 classes = boxes[..., 5].cpu().numpy().astype(np.int)
427 336 1158.0 3.4 0.0 keep = []
428 7035 57129.0 8.1 1.7 for idxs in ub.group_items(range(len(classes)), classes).values():
429 6699 58671.0 8.8 1.7 cls_boxes = bboxes.take(idxs, axis=0)
430 6699 35854.0 5.4 1.0 cls_scores = scores.take(idxs, axis=0)
431 6699 21426.0 3.2 0.6 cls_keep = util.non_max_supression(cls_boxes, cls_scores,
432 6699 3078737.0 459.6 89.0 self.nms_thresh)
433 6699 70162.0 10.5 2.0 keep.extend(list(ub.take(idxs, cls_keep)))
434 336 4602.0 13.7 0.1 keep = sorted(keep)
435 336 48744.0 145.1 1.4 return boxes[torch.LongTensor(keep)]
436 elif mode == 0 or mode == 2:
437 # if torch.cuda.is_available:
438 # boxes = boxes.cuda()
439
440 x1 = bboxes[:, 0]
441 y1 = bboxes[:, 1]
442 x2 = bboxes[:, 2]
443 y2 = bboxes[:, 3]
444
445 areas = ((x2 - x1) * (y2 - y1))
446 _, order = scores.sort(0, descending=True)
447
448 keep = []
449 while order.numel() > 0:
450 if order.numel() == 1:
451 if torch.__version__.startswith('0.3'):
452 i = order[0]
453 else:
454 i = order.item()
455 i = order.item()
456 keep.append(i)
457 break
458
459 i = order[0]
460 keep.append(i)
461
462 xx1 = x1[order[1:]].clamp(min=x1[i])
463 yy1 = y1[order[1:]].clamp(min=y1[i])
464 xx2 = x2[order[1:]].clamp(max=x2[i])
465 yy2 = y2[order[1:]].clamp(max=y2[i])
466
467 w = (xx2 - xx1).clamp(min=0)
468 h = (yy2 - yy1).clamp(min=0)
469 inter = w * h
470
471 iou = inter / (areas[i] + areas[order[1:]] - inter)
472
473 ids = (iou <= self.nms_thresh).nonzero().squeeze()
474 if ids.numel() == 0:
475 break
476 order = order[ids + 1]
477 return boxes[torch.LongTensor(keep)]
Pystone time: 3.78105 s
File: /home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py
Function: __call__ at line 44
Line # Hits Time Per Hit % Time Line Contents
==============================================================
44 @profiler.profile
45 def __call__(self, network_output, mode=1):
46 """ Compute bounding boxes after thresholding and nms
47
48 network_output (torch.autograd.Variable): Output tensor from the lightnet network
49
50 # Examples:
51 # >>> import torch
52 # >>> torch.random.manual_seed(0)
53 # >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
54 # >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
55 # >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
56 # >>> output = torch.randn(8, 125, 9, 9)
57 # >>> boxes = self(output)
58 # >>> assert len(boxes) == 8
59 # >>> assert all(b.shape[1] == 6 for b in boxes)
60
61 CommandLine:
62 python -m netharn.models.yolo2.light_postproc GetBoundingBoxes.__call__:1 --profile
63 python -m netharn.models.yolo2.light_postproc GetBoundingBoxes.__call__:2 --profile
64
65 Example:
66 >>> import torch
67 >>> torch.random.manual_seed(0)
68 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
69 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
70 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
71 >>> import ubelt
72 >>> output = torch.randn(16, 125, 9, 9)
73 >>> output = output.cuda()
74 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+gpu'):
75 >>> output_ = output.clone()
76 >>> with timer:
77 >>> self(output_, mode=0)
78
79 Example:
80 >>> import torch
81 >>> torch.random.manual_seed(0)
82 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
83 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
84 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
85 >>> import ubelt
86 >>> output = torch.randn(16, 125, 9, 9)
87 >>> output = output.cuda()
88 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+gpu'):
89 >>> output_ = output.clone()
90 >>> with timer:
91 >>> self(output_, mode=1)
92
93 Benchmark:
94 >>> import torch
95 >>> torch.random.manual_seed(0)
96 >>> anchors = np.array([1.3221,1.73145,3.19275,4.00944,5.05587,
97 >>> 8.09892,9.47112,4.84053,11.2364,10.0071])
98 >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
99 >>> import ubelt
100 >>> output = torch.randn(16, 125, 9, 9)
101 >>> #
102 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+cpu'):
103 >>> output_ = output.clone()
104 >>> with timer:
105 >>> self(output_, mode=0)
106 >>> #
107 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+cpu'):
108 >>> output_ = output.clone()
109 >>> with timer:
110 >>> self(output_, mode=1)
111 >>> #
112 >>> output = output.cuda()
113 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode0+gpu'):
114 >>> output_ = output.clone()
115 >>> with timer:
116 >>> self(output_, mode=0)
117 >>> #
118 >>> for timer in ubelt.Timerit(21, bestof=3, label='mode1+gpu'):
119 >>> output_ = output.clone()
120 >>> with timer:
121 >>> self(output_, mode=1)
122 >>> #for timer in ubelt.Timerit(21, bestof=3, label='mode2+gpu'):
123 >>> # output_ = output.clone()
124 >>> # with timer:
125 >>> # self(output_, mode=2)
126
127 %timeit self(output.data, mode=0)
128 %timeit self(output.data, mode=1)
129 %timeit self(output.data, mode=2)
130 """
131 21 65289.0 3109.0 1.7 boxes = self._get_boxes(network_output.data, mode=mode)
132 21 3609713.0 171891.1 95.5 boxes = [self._nms(box, mode=mode) for box in boxes]
133
134 # force all boxes to be inside the image
135 21 105953.0 5045.4 2.8 boxes = [self._clip_boxes(box) for box in boxes]
136 21 51.0 2.4 0.0 postout = boxes
137 21 46.0 2.2 0.0 return postout
-1.00 seconds - None :None:None
0.05 seconds - GetBoundingBoxes._get_boxes :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:202
0.09 seconds - GetBoundingBoxes._clip_boxes:/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:139
3.46 seconds - GetBoundingBoxes._nms :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:362
3.78 seconds - GetBoundingBoxes.__call__ :/home/joncrall/code/netharn/netharn/models/yolo2/light_postproc.py:44
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment