Skip to content

Instantly share code, notes, and snippets.

@miladfa7
Last active September 11, 2024 14:31
Show Gist options
  • Save miladfa7/641e95719e97d59ce0dbe05ceecde0d6 to your computer and use it in GitHub Desktop.
Save miladfa7/641e95719e97d59ce0dbe05ceecde0d6 to your computer and use it in GitHub Desktop.
This code performs Non-Maximum Suppression (NMS) to filter object detection predictions and visualize the results. numpy_nms function: Implements NMS using NumPy. apply_nms function: Applies NMS using both the custom NumPy-based NMS and PyTorch’s built-in NMS (torchvision.ops.nms).
import numpy as np
import os
import cv2
import torchvision
import torch
import json
def numpy_nms(boxes, scores, iou_threshold):
"""
Perform Non-Maximum Suppression (NMS) using NumPy.
Args:
boxes (numpy.ndarray): Array of bounding boxes with shape [N, 4], where N is the number of boxes.
Each box is represented as [x1, y1, x2, y2].
scores (numpy.ndarray): Array of scores with shape [N].
iou_threshold (float): IoU threshold for suppression.
Returns:
numpy.ndarray: Indices of the selected boxes after NMS.
"""
# Ensure boxes are float for precise calculation
boxes = boxes.astype(np.float32)
# Compute the area of each box
x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
# Sort the boxes by scores in descending order
order = scores.argsort()[::-1]
# List to hold the indices of the selected boxes
keep = []
while order.size > 0:
i = order[0] # Index of the current highest score box
keep.append(i)
# Compute IoU of the highest score box with the rest
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
# Compute width and height of the intersection
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
# Compute the intersection area
intersection = w * h
# Compute IoU
iou = intersection / (areas[i] + areas[order[1:]] - intersection)
# Suppress boxes with IoU greater than the threshold
remaining = np.where(iou <= iou_threshold)[0]
# Update the order list
order = order[remaining + 1]
return np.array(keep)
def apply_nms(predictions: np.array, iou_threshold: float, num_classes: int = 1):
"""
Performs non-max suppression on object detection predictions (box IOU is used).
Args:
predictions (np.ndarray): An array of object detection predictions in
the format of `(x_min, y_min, x_max, y_max, score)`.
iou_threshold (float): The intersection-over-union threshold
to use for non-maximum suppression.
num_classes (int): Number of classes (not used in this implementation).
Returns:
np.ndarray: Filtered predictions after applying NMS, in the format of `(x_min, y_min, x_max, y_max, score)`.
"""
boxes = predictions[:, :4]
scores = predictions[:, 4]
indices = numpy_nms(boxes, scores, iou_threshold)
filtered_predictions_numpy = predictions[indices]
predictions_tensor = torch.from_numpy(predictions)
boxes = predictions_tensor[:, :4]
scores = predictions_tensor[:, 4]
indices = torchvision.ops.nms(boxes, scores, iou_threshold)
filtered_predictions_tesnor = predictions_tensor[indices]
return filtered_predictions_numpy, filtered_predictions_tesnor
if __name__=="__main__":
image_test = cv2.imread("test.jpg")
predictions = [[157, 504, 170, 531, 0.8604464], [1053, 504, 1067, 535, 0.9075985], [1056, 445, 1067, 468, 0.86464584], [710, 430, 720, 447, 0.8747747], [1101, 460, 1109, 485, 0.89883935], [758, 409, 769, 424, 0.85449785], [900, 426, 910, 447, 0.8431163], [1279, 454, 1290, 479, 0.9011379], [1184, 367, 1191, 381, 0.80410206], [1084, 397, 1092, 416, 0.8952946], [728, 414, 737, 430, 0.73510545], [75, 492, 86, 518, 0.831687], [1858, 473, 1869, 499, 0.84012896], [811, 483, 823, 506, 0.894104], [1021, 452, 1031, 472, 0.42788184], [880, 363, 887, 379, 0.50802994], [1018, 449, 1028, 470, 0.77440435], [728, 414, 736, 430, 0.4596147], [734, 411, 743, 425, 0.4721304], [1037, 996, 1066, 1046, 0.2523128]]
predictions = np.array(predictions)
# Applay NMS on prediciton of model
filtered_predictions_numpy, filtered_predictions_tesnor = apply_nms(predictions, iou_threshold=0.2)
img1 = image_test.copy()
for xmin, ymin, xmax, ymax, score in predictions:
cv2.rectangle(img1, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0,0), 1)
img2 = image_test.copy()
for xmin, ymin, xmax, ymax, score in filtered_predictions_numpy:
cv2.rectangle(img2, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0,255), 1)
merge_img = np.vstack([img1, img2])
cv2.imwrite("merged_img.jpg", merge_img)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment