-
-
Save Erol444/82ef64964185dac272dd4b238006cab7 to your computer and use it in GitHub Desktop.
Mobilenet video DepthAI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from pathlib import Path | |
import sys | |
import cv2 | |
import depthai as dai | |
import numpy as np | |
# Get argument first | |
nnPath = str((Path(__file__).parent / Path('models/mobilenet-ssd_openvino_2021.2_5shave.blob')).resolve().absolute()) | |
if len(sys.argv) > 1: | |
nnPath = sys.argv[1] | |
if not Path(nnPath).exists(): | |
import sys | |
raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"') | |
# MobilenetSSD label texts | |
labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", | |
"diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] | |
# Create pipeline | |
pipeline = dai.Pipeline() | |
pipeline.setOpenVINOVersion(dai.OpenVINO.Version.VERSION_2021_2) | |
# Define sources and outputs | |
camRgb = pipeline.createColorCamera() | |
nn = pipeline.createMobileNetDetectionNetwork() | |
xoutVideo = pipeline.createXLinkOut() | |
nnOut = pipeline.createXLinkOut() | |
xoutVideo.setStreamName("video") | |
nnOut.setStreamName("nn") | |
# Properties | |
camRgb.setPreviewSize(300, 300) # NN input | |
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) | |
camRgb.setInterleaved(False) | |
camRgb.setPreviewKeepAspectRatio(False) | |
# Define a neural network that will make predictions based on the source frames | |
nn.setConfidenceThreshold(0.5) | |
nn.setBlobPath(nnPath) | |
nn.setNumInferenceThreads(2) | |
nn.input.setBlocking(False) | |
# Has to be 1:1 aspect ratio to match the NN input aspect ratio (300x300) | |
camRgb.setVideoSize(1080, 1080) | |
# Linking | |
camRgb.video.link(xoutVideo.input) | |
camRgb.preview.link(nn.input) | |
nn.out.link(nnOut.input) | |
# Connect to device and start pipeline | |
with dai.Device(pipeline) as device: | |
# Output queues will be used to get the frames and nn data from the outputs defined above | |
qVideo = device.getOutputQueue(name="video", maxSize=4, blocking=False) | |
qDet = device.getOutputQueue(name="nn", maxSize=4, blocking=False) | |
videoFrame = None | |
detections = [] | |
# nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height | |
def frameNorm(frame, bbox): | |
normVals = np.full(len(bbox), frame.shape[0]) | |
normVals[::2] = frame.shape[1] | |
return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) | |
def displayFrame(name, frame): | |
color = (255, 0, 0) | |
for detection in detections: | |
bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) | |
cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) | |
cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) | |
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) | |
# Show the frame | |
cv2.imshow(name, frame) | |
while True: | |
# Instead of get (blocking), we use tryGet (nonblocking) which will return the available data or None otherwise | |
inVideo = qVideo.tryGet() | |
inDet = qDet.tryGet() | |
if inVideo is not None: | |
videoFrame = inVideo.getCvFrame() | |
if inDet is not None: | |
detections = inDet.detections | |
if videoFrame is not None: | |
displayFrame("video", videoFrame) | |
if cv2.waitKey(1) == ord('q'): | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment