Last active
February 17, 2023 13:06
-
-
Save Neutree/d23796f383af104c78a1a0722d73cf6b to your computer and use it in GitHub Desktop.
yolov5 code for rk3588/rk3588s(borad rock5b)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# yolov5 code for rk3588/rk3588s(borad rock5b) | |
# @author neucrack neucrack.com | |
# @license MIT | |
import sys | |
import cv2 | |
import numpy as np | |
from rknnlite.api import RKNNLite | |
OBJ_THRESH = 0.25 | |
NMS_THRESH = 0.45 | |
def sigmoid(x): | |
return 1 / (1 + np.exp(-x)) | |
def xywh2xyxy(x): | |
# Convert [x, y, w, h] to [x1, y1, x2, y2] | |
y = np.copy(x) | |
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x | |
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y | |
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x | |
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y | |
return y | |
def process(input, mask, anchors, input_size = 640): | |
anchors = [anchors[i] for i in mask] | |
grid_h, grid_w = map(int, input.shape[0:2]) | |
box_confidence = sigmoid(input[..., 4]) | |
box_confidence = np.expand_dims(box_confidence, axis=-1) | |
box_class_probs = sigmoid(input[..., 5:]) | |
box_xy = sigmoid(input[..., :2])*2 - 0.5 | |
col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w) | |
row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h) | |
col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) | |
row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) | |
grid = np.concatenate((col, row), axis=-1) | |
box_xy += grid | |
box_xy *= int(input_size/grid_h) | |
box_wh = pow(sigmoid(input[..., 2:4])*2, 2) | |
box_wh = box_wh * anchors | |
box = np.concatenate((box_xy, box_wh), axis=-1) | |
return box, box_confidence, box_class_probs | |
def filter_boxes(boxes, box_confidences, box_class_probs): | |
"""Filter boxes with box threshold. It's a bit different with origin yolov5 post process! | |
# Arguments | |
boxes: ndarray, boxes of objects. | |
box_confidences: ndarray, confidences of objects. | |
box_class_probs: ndarray, class_probs of objects. | |
# Returns | |
boxes: ndarray, filtered boxes. | |
classes: ndarray, classes for boxes. | |
scores: ndarray, scores for boxes. | |
""" | |
boxes = boxes.reshape(-1, 4) | |
box_confidences = box_confidences.reshape(-1) | |
box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1]) | |
_box_pos = np.where(box_confidences >= OBJ_THRESH) | |
boxes = boxes[_box_pos] | |
box_confidences = box_confidences[_box_pos] | |
box_class_probs = box_class_probs[_box_pos] | |
class_max_score = np.max(box_class_probs, axis=-1) | |
classes = np.argmax(box_class_probs, axis=-1) | |
_class_pos = np.where(class_max_score >= OBJ_THRESH) | |
boxes = boxes[_class_pos] | |
classes = classes[_class_pos] | |
scores = (class_max_score* box_confidences)[_class_pos] | |
return boxes, classes, scores | |
def nms_boxes(boxes, scores): | |
"""Suppress non-maximal boxes. | |
# Arguments | |
boxes: ndarray, boxes of objects. | |
scores: ndarray, scores of objects. | |
# Returns | |
keep: ndarray, index of effective boxes. | |
""" | |
x = boxes[:, 0] | |
y = boxes[:, 1] | |
w = boxes[:, 2] - boxes[:, 0] | |
h = boxes[:, 3] - boxes[:, 1] | |
areas = w * h | |
order = scores.argsort()[::-1] | |
keep = [] | |
while order.size > 0: | |
i = order[0] | |
keep.append(i) | |
xx1 = np.maximum(x[i], x[order[1:]]) | |
yy1 = np.maximum(y[i], y[order[1:]]) | |
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]]) | |
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]]) | |
w1 = np.maximum(0.0, xx2 - xx1 + 0.00001) | |
h1 = np.maximum(0.0, yy2 - yy1 + 0.00001) | |
inter = w1 * h1 | |
ovr = inter / (areas[i] + areas[order[1:]] - inter) | |
inds = np.where(ovr <= NMS_THRESH)[0] | |
order = order[inds + 1] | |
keep = np.array(keep) | |
return keep | |
def yolov5_post_process(input_data, input_size = 640): | |
masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]] | |
anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], | |
[59, 119], [116, 90], [156, 198], [373, 326]] | |
boxes, classes, scores = [], [], [] | |
for input, mask in zip(input_data, masks): | |
b, c, s = process(input, mask, anchors, input_size) | |
b, c, s = filter_boxes(b, c, s) | |
boxes.append(b) | |
classes.append(c) | |
scores.append(s) | |
boxes = np.concatenate(boxes) | |
boxes = xywh2xyxy(boxes) | |
classes = np.concatenate(classes) | |
scores = np.concatenate(scores) | |
nboxes, nclasses, nscores = [], [], [] | |
for c in set(classes): | |
inds = np.where(classes == c) | |
b = boxes[inds] | |
c = classes[inds] | |
s = scores[inds] | |
keep = nms_boxes(b, s) | |
nboxes.append(b[keep]) | |
nclasses.append(c[keep]) | |
nscores.append(s[keep]) | |
if not nclasses and not nscores: | |
return None, None, None | |
boxes = np.concatenate(nboxes) | |
classes = np.concatenate(nclasses) | |
scores = np.concatenate(nscores) | |
return boxes, classes, scores | |
def draw(image, boxes, scores, classes, labels): | |
"""Draw the boxes on the image. | |
# Argument: | |
image: original image. | |
boxes: ndarray, boxes of objects. | |
classes: ndarray, classes of objects. | |
scores: ndarray, scores of objects. | |
all_classes: all classes name. | |
""" | |
for box, score, cl in zip(boxes, scores, classes): | |
top, left, right, bottom = box | |
print('class: {}, score: {}'.format(labels[cl], score)) | |
print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom)) | |
top = int(top) | |
left = int(left) | |
right = int(right) | |
bottom = int(bottom) | |
cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2) | |
cv2.putText(image, '{0} {1:.2f}'.format(labels[cl], score), | |
(top, left - 6), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.6, (0, 0, 255), 2) | |
def read_cam(nn, w, h, callback, labels): | |
max_w = 3840 | |
min_h = 2160 | |
if max_w / w > min_h / h: | |
cap_h = h | |
cap_w = max_w / (min_h // h) | |
else: | |
cap_w = w | |
cap_h = min_h / (max_w // w) | |
# 16 align | |
cap_w = int((cap_w + 15) // 16 * 16) | |
cap_h = int((cap_h + 15) // 16 * 16) | |
print("capture size:", cap_w, cap_h) | |
cap = cv2.VideoCapture(f"v4l2src device=/dev/video11 ! video/x-raw,format=NV12,width={cap_w},height={cap_h}, framerate=30/1 ! appsink") | |
if cap.isOpened(): | |
cv2.namedWindow("demo", cv2.WINDOW_AUTOSIZE) | |
while True: | |
ret_val, img = cap.read() | |
# img2 = cv2.cvtColor(img, cv2.COLOR_YUV2BGR_NV12) | |
img2 = cv2.cvtColor(img, cv2.COLOR_YUV2RGB_NV12) | |
# crop wxh from center of img2 | |
img2 = img2[(img2.shape[0] - h) // 2:(img2.shape[0] + h) // 2, (img2.shape[1] - w) // 2:(img2.shape[1] + w) // 2] | |
callback(nn, img2, labels) | |
else: | |
print("camera open failed") | |
cv2.destroyAllWindows() | |
def nn_init(rknn_model): | |
rknn_lite = RKNNLite() | |
ret = rknn_lite.load_rknn(rknn_model) | |
if ret != 0: | |
raise Exception('Load RKNN model failed') | |
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0_1_2) # NPU_CORE_0 | |
if ret != 0: | |
raise Exception('Init runtime environment failed') | |
return rknn_lite | |
def on_image(nn, img, labels): | |
''' | |
@img RGB hwc image | |
''' | |
# normalization will be done automatically in inference method | |
outs = nn.inference(inputs=[img]) | |
for i, out in enumerate(outs): | |
print(f"output {i}: {out.shape}") | |
input0_data = outs[0] | |
input1_data = outs[1] | |
input2_data = outs[2] | |
input0_data = input0_data.reshape([3, -1]+list(input0_data.shape[-2:])) | |
input1_data = input1_data.reshape([3, -1]+list(input1_data.shape[-2:])) | |
input2_data = input2_data.reshape([3, -1]+list(input2_data.shape[-2:])) | |
input_data = list() | |
input_data.append(np.transpose(input0_data, (2, 3, 0, 1))) | |
input_data.append(np.transpose(input1_data, (2, 3, 0, 1))) | |
input_data.append(np.transpose(input2_data, (2, 3, 0, 1))) | |
boxes, classes, scores = yolov5_post_process(input_data) | |
img2 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
if boxes is not None: | |
draw(img2, boxes, scores, classes, labels = labels) | |
cv2.imshow('demo',img2) | |
cv2.waitKey(1) | |
if __name__ == '__main__': | |
width = 640 | |
height = 640 | |
if len(sys.argv) != 2: | |
print("Usage:") | |
print(" python yolov5_camera.py yolov5s.rknn") | |
sys.exit(0) | |
model = sys.argv[1] | |
labels = ("person", "bicycle", "car", "motorbike ", "aeroplane ", "bus ", "train", "truck ", "boat", "traffic light", | |
"fire hydrant", "stop sign ", "parking meter", "bench", "bird", "cat", "dog ", "horse ", "sheep", "cow", "elephant", | |
"bear", "zebra ", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", | |
"baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife ", | |
"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza ", "donut", "cake", "chair", "sofa", | |
"pottedplant", "bed", "diningtable", "toilet ", "tvmonitor", "laptop ", "mouse ", "remote ", "keyboard ", "cell phone", "microwave ", | |
"oven ", "toaster", "sink", "refrigerator ", "book", "clock", "vase", "scissors ", "teddy bear ", "hair drier", "toothbrush ") | |
nn = nn_init(model) | |
read_cam(nn, width, height, on_image, labels) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment