Created
January 19, 2024 18:33
-
-
Save hu8813/7699b346e02951dff871083cf6248232 to your computer and use it in GitHub Desktop.
prepare.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import cv2 | |
import time | |
import onnxruntime | |
class YOLOv8: | |
def __init__(self, path, conf_thres=0.3, iou_thres=0.3): | |
self.conf_threshold = conf_thres | |
self.iou_threshold = iou_thres | |
# Initialize model | |
self.initialize_model(path) | |
def __call__(self, image): | |
return self.detect_objects(image) | |
def initialize_model(self, path): | |
available_providers = onnxruntime.get_available_providers() | |
providers = ['TensorrtExecutionProvider'] if 'TensorrtExecutionProvider' in available_providers else [] | |
if 'CUDAExecutionProvider' in available_providers: | |
providers.append('CUDAExecutionProvider') | |
else: | |
providers.append('CPUExecutionProvider') | |
self.session = onnxruntime.InferenceSession(path, providers=providers) | |
self.get_input_details() | |
self.get_output_details() | |
def detect_objects(self, image): | |
input_tensor = self.prepare_input(image) | |
outputs = self.inference(input_tensor) | |
self.boxes, self.scores, self.class_ids = self.process_output(outputs) | |
#print(self.boxes) | |
return self.boxes, self.scores, self.class_ids | |
#""" | |
def prepare_input(self, image): | |
self.img_height, self.img_width = image.shape[:2] | |
input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
# Resize input image | |
input_img = cv2.resize(input_img, (320, 320)) # Adjust the size according to your model's input shape | |
# Scale input pixel values to 0 to 1 | |
input_img = input_img / 255 | |
#input_img = np.transpose(input_img, (0, 1,2)) # NHWC to NCHW | |
input_tensor = np.expand_dims(input_img, axis=0).astype(np.float32) | |
return {'serving_default_images:0': input_tensor} | |
def inference(self, input_tensor): | |
start = time.perf_counter() | |
#outputs = self.session.run([self.output_names[0]], {self.input_names[0]: input_tensor}) | |
outputs = self.session.run([self.output_names[0]], input_tensor) | |
return outputs | |
def process_output(self, output): | |
predictions = np.squeeze(output[0]).T | |
#print(predictions) | |
# Filter out object confidence scores below threshold | |
scores = np.max(predictions[:, 4:], axis=1) | |
predictions = predictions[scores > self.conf_threshold, :] | |
scores = scores[scores > self.conf_threshold] | |
if len(scores) == 0: | |
return [], [], [] | |
# Get the class with the highest confidence | |
class_ids = np.argmax(predictions[:, 4:], axis=1) | |
# Get bounding boxes for each object | |
boxes = self.extract_boxes(predictions) | |
# Apply non-maxima suppression | |
indices = multiclass_nms(boxes, scores, class_ids, self.iou_threshold) | |
return boxes[indices], scores[indices], class_ids[indices] | |
def extract_boxes(self, predictions): | |
# Extract boxes from predictions | |
boxes = predictions[:, :4] | |
# Scale boxes to original image dimensions | |
boxes = self.rescale_boxes(boxes) | |
# Convert boxes to xyxy format | |
boxes = xywh2xyxy(boxes) | |
return boxes | |
def rescale_boxes(self, boxes): | |
# Rescale boxes to original image dimensions | |
input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height]) | |
boxes = np.divide(boxes, input_shape, dtype=np.float32) | |
boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height]) | |
return boxes | |
def draw_detections(self, image, draw_scores=True, mask_alpha=0.4): | |
return draw_detections(image, self.boxes, self.scores, self.class_ids, mask_alpha) | |
def get_input_details(self): | |
model_inputs = self.session.get_inputs() | |
self.input_names = [model_inputs[i].name for i in range(len(model_inputs))] | |
self.input_shape = model_inputs[0].shape | |
self.input_height = self.input_shape[2] | |
self.input_width = self.input_shape[3] | |
def get_output_details(self): | |
model_outputs = self.session.get_outputs() | |
#print(model_outputs) | |
self.output_names = [model_outputs[i].name for i in range(len(model_outputs))] | |
class_names = ['cars', 'light'] | |
# Create a list of colors for each class where each color is a tuple of 3 integer values | |
rng = np.random.default_rng(3) | |
colors = rng.uniform(0, 255, size=(len(class_names), 3)) | |
def nms(boxes, scores, iou_threshold): | |
# Sort by score | |
sorted_indices = np.argsort(scores)[::-1] | |
keep_boxes = [] | |
while sorted_indices.size > 0: | |
# Pick the last box | |
box_id = sorted_indices[0] | |
keep_boxes.append(box_id) | |
# Compute IoU of the picked box with the rest | |
ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :]) | |
# Remove boxes with IoU over the threshold | |
keep_indices = np.where(ious < iou_threshold)[0] | |
# print(keep_indices.shape, sorted_indices.shape) | |
sorted_indices = sorted_indices[keep_indices + 1] | |
return keep_boxes | |
def multiclass_nms(boxes, scores, class_ids, iou_threshold): | |
unique_class_ids = np.unique(class_ids) | |
keep_boxes = [] | |
for class_id in unique_class_ids: | |
class_indices = np.where(class_ids == class_id)[0] | |
class_boxes = boxes[class_indices,:] | |
class_scores = scores[class_indices] | |
class_keep_boxes = nms(class_boxes, class_scores, iou_threshold) | |
keep_boxes.extend(class_indices[class_keep_boxes]) | |
return keep_boxes | |
def compute_iou(box, boxes): | |
# Compute xmin, ymin, xmax, ymax for both boxes | |
xmin = np.maximum(box[0], boxes[:, 0]) | |
ymin = np.maximum(box[1], boxes[:, 1]) | |
xmax = np.minimum(box[2], boxes[:, 2]) | |
ymax = np.minimum(box[3], boxes[:, 3]) | |
# Compute intersection area | |
intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin) | |
# Compute union area | |
box_area = (box[2] - box[0]) * (box[3] - box[1]) | |
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) | |
union_area = box_area + boxes_area - intersection_area | |
# Compute IoU | |
iou = intersection_area / union_area | |
return iou | |
def xywh2xyxy(x): | |
# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2) | |
y = np.copy(x) | |
y[..., 0] = x[..., 0] - x[..., 2] / 2 | |
y[..., 1] = x[..., 1] - x[..., 3] / 2 | |
y[..., 2] = x[..., 0] + x[..., 2] / 2 | |
y[..., 3] = x[..., 1] + x[..., 3] / 2 | |
return y | |
def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3): | |
det_img = image.copy() | |
img_height, img_width = image.shape[:2] | |
font_size = min([img_height, img_width]) * 0.0006 | |
text_thickness = int(min([img_height, img_width]) * 0.001) | |
det_img = draw_masks(det_img, boxes, class_ids, mask_alpha) | |
# Draw bounding boxes and labels of detections | |
for class_id, box, score in zip(class_ids, boxes, scores): | |
color = colors[class_id] | |
draw_box(det_img, box, color) | |
label = class_names[class_id] | |
caption = f'{label} {int(score * 100)}%' | |
draw_text(det_img, caption, box, color, font_size, text_thickness) | |
return det_img | |
def draw_box(image, box, color=(0, 0, 255), thickness=2): | |
x1, y1, x2, y2 = box.astype(int) | |
return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness) | |
def draw_text(image, text, box, color=(0, 0, 255), font_size=0.001, text_thickness=2): | |
x1, y1, x2, y2 = box.astype(int) | |
(tw, th), _ = cv2.getTextSize(text=text, fontFace=cv2.FONT_HERSHEY_SIMPLEX, | |
fontScale=font_size, thickness=text_thickness) | |
th = int(th * 1.2) | |
cv2.rectangle(image, (x1, y1), | |
(x1 + tw, y1 - th), color, -1) | |
return cv2.putText(image, text, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255, 255, 255), text_thickness, cv2.LINE_AA) | |
def draw_masks(image: np.ndarray, boxes: np.ndarray, classes: np.ndarray, mask_alpha: float = 0.3) -> np.ndarray: | |
mask_img = image.copy() | |
# Draw bounding boxes and labels of detections | |
for box, class_id in zip(boxes, classes): | |
color = colors[class_id] | |
x1, y1, x2, y2 = box.astype(int) | |
# Draw fill rectangle in mask image | |
cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1) | |
return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment