Skip to content

Instantly share code, notes, and snippets.

@UnaNancyOwen
Last active December 17, 2023 04:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save UnaNancyOwen/802b724b46977115d94332b5971775b4 to your computer and use it in GitHub Desktop.
Save UnaNancyOwen/802b724b46977115d94332b5971775b4 to your computer and use it in GitHub Desktop.
Object Detection using YOLOX inferred by OpenCV DNN module
import cv2
import numpy as np
from yolox import DetectionModel
# params
WEIGHTS = "yolox_s.onnx"
INPUT_SIZE = (640, 640)
NUM_CLASSES = 80
SCORE_THRESHOLD = 0.6
IOU_THRESHOLD = 0.4
# get raudom colors
def get_colors(num):
colors = []
np.random.seed(0)
for i in range(num):
color = np.random.randint(0, 256, [3]).astype(np.uint8)
colors.append(color.tolist())
return colors
# main
def main():
# read image
image = cv2.imread("dog.jpg")
if image is None:
raise IOError("can't read image!")
# create detection model class for yolox
weight = WEIGHTS
input_size = INPUT_SIZE
model = DetectionModel(weight, input_size)
# detect objects
score_threshold = SCORE_THRESHOLD
iou_threshold = IOU_THRESHOLD
class_ids, scores, boxes = model.detect(image, score_threshold, iou_threshold)
# draw objects
num_classes = NUM_CLASSES
colors = get_colors(num_classes)
for box, score, class_id in zip(boxes, scores, class_ids):
color = colors[class_id]
thickness = 2
line_type = cv2.LINE_AA
cv2.rectangle(image, box, color, thickness, line_type)
# show image
cv2.imshow("image", image)
cv2.waitKey(0)
if __name__ == '__main__':
main()
import cv2
import numpy as np
# detection model class for yolox
class DetectionModel:
# constructor
def __init__(self, weight, input_size = (640, 640)):
self.__initialize(weight, input_size)
# initialize
def __initialize(self, weight, input_size):
self.net = cv2.dnn.readNet(weight)
self.input_size = input_size
self.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
self.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
strides = [8, 16, 32]
self.grids, self.expanded_strides = self.__create_grids_and_expanded_strides(strides)
# create grids and expanded strides
def __create_grids_and_expanded_strides(self, strides):
grids = []
expanded_strides = []
hsizes = [self.input_size[0] // stride for stride in strides]
wsizes = [self.input_size[1] // stride for stride in strides]
for hsize, wsize, stride in zip(hsizes, wsizes, strides):
xv, yv = np.meshgrid(np.arange(hsize), np.arange(wsize))
grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
grids.append(grid)
shape = grid.shape[:2]
expanded_strides.append(np.full((*shape, 1), stride))
grids = np.concatenate(grids, 1)
expanded_strides = np.concatenate(expanded_strides, 1)
return grids, expanded_strides
# set preferable backend
def setPreferableBackend(self, backend):
self.net.setPreferableBackend(backend)
# set preferable target
def setPreferableTarget(self, target):
self.net.setPreferableTarget(target)
# detect objects
def detect(self, image, score_threshold, iou_threshold):
self.image_shape = image.shape
input_blob, resize_ratio = self.__preprocess(image)
output_blob = self.__predict(input_blob)
boxes, scores, class_ids = self.__postprocess(output_blob, resize_ratio)
boxes, scores, class_ids = self.__nms(boxes, scores, class_ids, score_threshold, iou_threshold)
return class_ids, scores, boxes
# preprocess
def __preprocess(self, image):
resize_ratio = min(self.input_size[0] / self.image_shape[0], self.input_size[1] / self.image_shape[1])
resized_image = cv2.resize(image, dsize=None, fx=resize_ratio, fy=resize_ratio)
padded_image = np.ones((self.input_size[0], self.input_size[1], 3), dtype=np.uint8) * 114
padded_image[: resized_image.shape[0], : resized_image.shape[1]] = resized_image
input_blob = cv2.dnn.blobFromImage(padded_image, 1.0, self.input_size, (0.0, 0.0, 0.0), True, False)
return input_blob, resize_ratio
# predict
def __predict(self, input_blob):
self.net.setInput(input_blob)
output_layer = self.net.getUnconnectedOutLayersNames()[0] # "output"
output_blob = self.net.forward(output_layer)
return output_blob
# postprocess
def __postprocess(self, output_blob, resize_ratio):
output_blob[..., :2] = (output_blob[..., :2] + self.grids) * self.expanded_strides
output_blob[..., 2:4] = np.exp(output_blob[..., 2:4]) * self.expanded_strides
predictions = output_blob[0]
boxes = predictions[:, :4]
boxes_xywh = np.ones_like(boxes)
boxes_xywh[:, 0] = boxes[:, 0] - boxes[:, 2] * 0.5
boxes_xywh[:, 1] = boxes[:, 1] - boxes[:, 3] * 0.5
boxes_xywh[:, 2] = (boxes[:, 0] + boxes[:, 2] * 0.5) - boxes_xywh[:, 0]
boxes_xywh[:, 3] = (boxes[:, 1] + boxes[:, 3] * 0.5) - boxes_xywh[:, 1]
boxes_xywh /= resize_ratio
scores = predictions[:, 4:5] * predictions[:, 5:]
class_ids = scores.argmax(1)
scores = scores[np.arange(len(class_ids)), class_ids]
return boxes_xywh, scores, class_ids
# non maximum suppression
def __nms(self, boxes, scores, class_ids, score_threshold, iou_threshold):
indices = cv2.dnn.NMSBoxesBatched(boxes, scores, class_ids, score_threshold, iou_threshold) # OpenCV 4.7.0 or later
keep_boxes = []
keep_scores = []
keep_class_ids = []
for index in indices:
keep_boxes.append(boxes[index])
keep_scores.append(scores[index])
keep_class_ids.append(class_ids[index])
if len(keep_boxes) != 0:
keep_boxes = np.vectorize(int)(keep_boxes)
return keep_boxes, keep_scores, keep_class_ids
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment