Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
WebCamObjectDetectionYoloV3.py
# Bruno Capuano 2020
# display the camera feed using OpenCV
# display FPS
# load YOLO object detector trained with COCO Dataset (80 classes)
# analyze each camera frame using YoloV3 searching for banana classes
import numpy as np
import time
import cv2
import os
def initYoloV3():
global labelColors, layerNames, net
# random color collection for each class label
np.random.seed(42)
labelColors = np.random.randint(0, 255, size=(len(Labels), 3), dtype="uint8")
# load model
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
layerNames = net.getLayerNames()
layerNames = [layerNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
def analyzeFrame(frame, displayBoundingBox = True, displayClassName = True, displayConfidence = True):
global H, W
# init
if W is None or H is None:
(H, W) = frame.shape[:2]
if net is None:
initYoloV3()
yoloV3ImgSize = (416, 416)
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, yoloV3ImgSize, swapRB=True, crop=False)
net.setInput(blob)
start = time.time()
layerOutputs = net.forward(layerNames)
end = time.time()
boxes = []
confidences = []
classIDs = []
for output in layerOutputs:
for detection in output:
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
if confidence > confidenceDef:
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
idxs = cv2.dnn.NMSBoxes(boxes, confidences, confidenceDef, thresholdDef)
if len(idxs) > 0:
for i in idxs.flatten():
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
if (displayBoundingBox):
color = [int(c) for c in labelColors[classIDs[i]]]
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
if(displayClassName and displayConfidence):
text = "{}: {:.4f}".format(Labels[classIDs[i]], confidences[i])
cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
elif(displayClassName):
text = str(f"{Labels[classIDs[i]]}:")
cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
# Camera Settings
camera_Width = 640 # 1024 # 1280 # 640
camera_Heigth = 480 # 780 # 960 # 480
frameSize = (camera_Width, camera_Heigth)
video_capture = cv2.VideoCapture(1)
time.sleep(2.0)
(W, H) = (None, None)
# YOLO Settings
weightsPath = "yolov3.weights"
configPath = "yolov3.cfg"
LabelsPath = "coco.names"
Labels = open(LabelsPath).read().strip().split("\n")
confidenceDef = 0.5
thresholdDef = 0.3
net = (None)
labelColors = (None)
layerNames = (None)
i = 0
detectionEnabled = False
while True:
i = i + 1
start_time = time.time()
ret, frameOrig = video_capture.read()
frame = cv2.resize(frameOrig, frameSize)
if(detectionEnabled):
analyzeFrame(frame)
if (time.time() - start_time ) > 0:
fpsInfo = "FPS: " + str(1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(frame, fpsInfo, (10, 20), font, 0.4, (255, 255, 255), 1)
cv2.imshow('@elbruno - YoloV3 Object Detection', frame)
# key controller
key = cv2.waitKey(1) & 0xFF
if key == ord("d"):
if (detectionEnabled == True):
detectionEnabled = False
else:
detectionEnabled = True
if key == ord("q"):
break
video_capture.release()
cv2.destroyAllWindows()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment