berak/yolov5.py Secret

## yolov5.py
#
# https://github.com/UNeedCryDear/yolov5-opencv-dnn-cpp
#
import numpy as np
import cv2

netWidth = 640
netHeight = 640
netStride = [8, 16, 32]
nmsThreshold = 0.45
boxThreshold = 0.25
classThreshold = 0.25
className = [ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
    "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
    "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
    "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
    "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
    "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
    "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
    "hair drier", "toothbrush" ]


def detect(net, img):
    blob = cv2.dnn.blobFromImage(img, 1 / 255.0, (netWidth, netHeight), (104, 117,123), True, False)
    net.setInput(blob)
    netOutputImg = net.forward(net.getUnconnectedOutLayersNames())
    netOutputImg = netOutputImg[0][0]
    ratio_h = img.shape[0] / netHeight
    ratio_w = img.shape[1] / netWidth
    net_width = len(className) + 5

    classIds = []
    confidences = []
    boxes = []

    r = 0
    for stride in [0,1,2]:
        grid_x = int(netWidth / netStride[stride])
        grid_y = int(netHeight / netStride[stride])
        for anchor in [0,1,2]:
            for i in range(grid_x):
                for j in range(grid_y):
                    pdata = netOutputImg[r]
                    box_score = pdata[4]
                    if box_score > boxThreshold:
                        out = pdata[5:]
                        classId = np.argmax(out)
                        class_score = out[classId]
                        if class_score > classThreshold:
                            x = pdata[0]
                            y = pdata[1]
                            w = pdata[2]
                            h = pdata[3]
                            left = (x - 0.5*w)*ratio_w
                            top  = (y - 0.5*h)*ratio_h
                            classIds.append(classId)
                            confidences.append(class_score*box_score)
                            boxes.append((left, top, int(w*ratio_w), int(h*ratio_h)))
                    r += 1 # next row
    nms_result = cv2.dnn.NMSBoxes(boxes, confidences, classThreshold, nmsThreshold)
    return nms_result, classIds, boxes, confidences


def drawPred(img, nms, classIds, boxes, confs, colors):
    for j in range(len(nms)):
        i = nms[j]
        id = classIds[i]
        left  = int(boxes[i][0])
        top   = int(boxes[i][1] - 3)
        right = left + int(boxes[i][2])
        bot   = top  + int(boxes[i][3])
        cv2.rectangle(img, (left,top),(right,bot), colors[id], 2, 8)

        label = "%s:%3.3f" % (className[id], confs[i])
        cv2.putText(img, label, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.4, colors[id], 1)

    cv2.namedWindow("1", 0)
    cv2.imshow("1", img)
    cv2.waitKey()


netPath = "c:/data/dnn/yolo/yolov5s.onnx"
net = cv2.dnn.readNet(netPath)

imgPath = "C:/data/cache/23/0BE404390A1F19B41441250B60801455CEB76CF6.png"
img = cv2.imread(imgPath)

# zero pad rectangular images to larger quadratic size
maxLen = img.shape[0]
if maxLen < img.shape[1]:
	maxLen = img.shape[1]
if maxLen > 1.2*img.shape[1] or maxLen > 1.2*img.shape[0]:
	resizeImg = np.zeros((maxLen, maxLen, 3), np.uint8)
	resizeImg[:img.shape[0], :img.shape[1]] = img
	img = resizeImg

#inference
nms_result, classIds, boxes, confidences = detect(net, img)

# viz
colors = np.random.rand(80,3) * 255
drawPred(img, nms_result, classIds, boxes, confidences, colors)
	#
	# https://github.com/UNeedCryDear/yolov5-opencv-dnn-cpp
	#
	import numpy as np
	import cv2

	netWidth = 640
	netHeight = 640
	netStride = [8, 16, 32]
	nmsThreshold = 0.45
	boxThreshold = 0.25
	classThreshold = 0.25
	className = [ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
	"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
	"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
	"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
	"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
	"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
	"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
	"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
	"hair drier", "toothbrush" ]


	def detect(net, img):
	blob = cv2.dnn.blobFromImage(img, 1 / 255.0, (netWidth, netHeight), (104, 117,123), True, False)
	net.setInput(blob)
	netOutputImg = net.forward(net.getUnconnectedOutLayersNames())
	netOutputImg = netOutputImg[0][0]
	ratio_h = img.shape[0] / netHeight
	ratio_w = img.shape[1] / netWidth
	net_width = len(className) + 5

	classIds = []
	confidences = []
	boxes = []

	r = 0
	for stride in [0,1,2]:
	grid_x = int(netWidth / netStride[stride])
	grid_y = int(netHeight / netStride[stride])
	for anchor in [0,1,2]:
	for i in range(grid_x):
	for j in range(grid_y):
	pdata = netOutputImg[r]
	box_score = pdata[4]
	if box_score > boxThreshold:
	out = pdata[5:]
	classId = np.argmax(out)
	class_score = out[classId]
	if class_score > classThreshold:
	x = pdata[0]
	y = pdata[1]
	w = pdata[2]
	h = pdata[3]
	left = (x - 0.5w)ratio_w
	top = (y - 0.5h)ratio_h
	classIds.append(classId)
	confidences.append(class_score*box_score)
	boxes.append((left, top, int(wratio_w), int(hratio_h)))
	r += 1 # next row
	nms_result = cv2.dnn.NMSBoxes(boxes, confidences, classThreshold, nmsThreshold)
	return nms_result, classIds, boxes, confidences


	def drawPred(img, nms, classIds, boxes, confs, colors):
	for j in range(len(nms)):
	i = nms[j]
	id = classIds[i]
	left = int(boxes[i][0])
	top = int(boxes[i][1] - 3)
	right = left + int(boxes[i][2])
	bot = top + int(boxes[i][3])
	cv2.rectangle(img, (left,top),(right,bot), colors[id], 2, 8)

	label = "%s:%3.3f" % (className[id], confs[i])
	cv2.putText(img, label, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.4, colors[id], 1)

	cv2.namedWindow("1", 0)
	cv2.imshow("1", img)
	cv2.waitKey()



	netPath = "c:/data/dnn/yolo/yolov5s.onnx"
	net = cv2.dnn.readNet(netPath)

	imgPath = "C:/data/cache/23/0BE404390A1F19B41441250B60801455CEB76CF6.png"
	img = cv2.imread(imgPath)

	# zero pad rectangular images to larger quadratic size
	maxLen = img.shape[0]
	if maxLen < img.shape[1]:
	maxLen = img.shape[1]
	if maxLen > 1.2img.shape[1] or maxLen > 1.2img.shape[0]:
	resizeImg = np.zeros((maxLen, maxLen, 3), np.uint8)
	resizeImg[:img.shape[0], :img.shape[1]] = img
	img = resizeImg

	#inference
	nms_result, classIds, boxes, confidences = detect(net, img)

	# viz
	colors = np.random.rand(80,3) * 255
	drawPred(img, nms_result, classIds, boxes, confidences, colors)