Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Update for NCSDK 2.0 based on code from Adrian Rosebrock's tutorial on (see link in comments)
# Updated for NCSDK 2.0. Original code by Adrian Rosebrock
# python --graph graphs/mobilenetgraph --display 1
# python --graph graphs/mobilenetgraph --confidence 0.5 --display 1
# import the necessary packages
from mvnc import mvncapi as mvnc
from import VideoStream
from import FPS
import argparse
import numpy as np
import time
import cv2
# initialize the list of class labels our network was trained to
# detect, then generate a set of bounding box colors for each class
CLASSES = ("background", "aeroplane", "bicycle", "bird",
"boat", "bottle", "bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person",
"pottedplant", "sheep", "sofa", "train", "tvmonitor")
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# frame dimensions should be sqaure
PREPROCESS_DIMS = (300, 300)
DISPLAY_DIMS = (900, 900)
# calculate the multiplier needed to scale the bounding boxes
def preprocess_image(input_image):
# preprocess the image
preprocessed = cv2.resize(input_image, PREPROCESS_DIMS)
preprocessed = preprocessed - 127.5
preprocessed = preprocessed * 0.007843
# [INFO] changed this to FP32 (np.float32) instead of FP16 because that is the NCS default
# might want to change back and set the graph initialation to FP16 instead
preprocessed = preprocessed.astype(np.float32)
# return the image to the calling function
return preprocessed
def predict(image, graph):
# preprocess the image
image = preprocess_image(image)
# send the image to the NCS and run a forward pass to grab the
# network predictions
# [INFO v1.0] graph.LoadTensor(image, None)
graph.queue_inference_with_fifo_elem(input_fifo, output_fifo, image, 'user object')
#[INFO v1.0] (output, _) = graph.GetResult()
(output, _) = output_fifo.read_elem()
# grab the number of valid object predictions from the output,
# then initialize the list of predictions
# [INFO] v2.0 needed this to be explicitly set to type int()
num_valid_boxes = int(output[0])
predictions = []
# loop over results
for box_index in range(num_valid_boxes):
# calculate the base index into our array so we can extract
# bounding box information
base_index = 7 + box_index * 7
# boxes with non-finite (inf, nan, etc) numbers must be ignored
if (not np.isfinite(output[base_index]) or
not np.isfinite(output[base_index + 1]) or
not np.isfinite(output[base_index + 2]) or
not np.isfinite(output[base_index + 3]) or
not np.isfinite(output[base_index + 4]) or
not np.isfinite(output[base_index + 5]) or
not np.isfinite(output[base_index + 6])):
# extract the image width and height and clip the boxes to the
# image size in case network returns boxes outside of the image
# boundaries
(h, w) = image.shape[:2]
x1 = max(0, int(output[base_index + 3] * w))
y1 = max(0, int(output[base_index + 4] * h))
x2 = min(w, int(output[base_index + 5] * w))
y2 = min(h, int(output[base_index + 6] * h))
# grab the prediction class label, confidence (i.e., probability),
# and bounding box (x, y)-coordinates
pred_class = int(output[base_index + 1])
pred_conf = output[base_index + 2]
pred_boxpts = ((x1, y1), (x2, y2))
# create prediciton tuple and append the prediction to the
# predictions list
prediction = (pred_class, pred_conf, pred_boxpts)
# return the list of predictions to the calling function
return predictions
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-g", "--graph", required=True,
help="path to input graph file")
ap.add_argument("-c", "--confidence", default=.5,
help="confidence threshold")
ap.add_argument("-d", "--display", type=int, default=0,
help="switch to display image on screen")
args = vars(ap.parse_args())
# grab a list of all NCS devices plugged in to USB
print("[INFO] finding NCS devices...")
#[INFO v1.0] devices = mvnc.EnumerateDevices()
devices = mvnc.enumerate_devices()
# if no devices found, exit the script
if len(devices) == 0:
print("[INFO] No devices found. Please plug in a NCS")
# use the first device since this is a simple test script
# (you'll want to modify this is using multiple NCS devices)
print("[INFO] found {} devices. device0 will be used. "
"opening device0...".format(len(devices)))
device = mvnc.Device(devices[0])
#[INFO v1.0] device.OpenDevice()
# open the CNN graph file
print("[INFO] loading the graph file into RPi memory...")
with open(args["graph"], mode="rb") as f:
graph_in_memory =
# load the graph into the NCS
print("[INFO] allocating the graph on the NCS...")
#[INFO v1.0] graph = device.AllocateGraph(graph_in_memory)
graph = mvnc.Graph('graph1')
# [INFO v2.0 requires this] Allocate the graph to the device and create
# input and output Fifos with default arguments
input_fifo, output_fifo = graph.allocate_with_fifos(device, graph_in_memory)
# open a pointer to the video stream thread and allow the buffer to
# start to fill, then start the FPS counter
print("[INFO] starting the video stream and FPS counter...")
vs = VideoStream(usePiCamera=False).start()
fps = FPS().start()
# loop over frames from the video file stream
while True:
# grab the frame from the threaded video stream
# make a copy of the frame and resize it for display/video purposes
frame =
image_for_result = frame.copy()
image_for_result = cv2.resize(image_for_result, DISPLAY_DIMS)
# use the NCS to acquire predictions
predictions = predict(frame, graph)
# loop over our predictions
for (i, pred) in enumerate(predictions):
# extract prediction data for readability
(pred_class, pred_conf, pred_boxpts) = pred
# filter out weak detections by ensuring the `confidence`
# is greater than the minimum confidence
if pred_conf > args["confidence"]:
# print prediction to terminal
print("[INFO] Prediction #{}: class={}, confidence={}, "
"boxpoints={}".format(i, CLASSES[pred_class], pred_conf,
# check if we should show the prediction data
# on the frame
if args["display"] > 0:
# build a label consisting of the predicted class and
# associated probability
label = "{}: {:.2f}%".format(CLASSES[pred_class],
pred_conf * 100)
# extract information from the prediction boxpoints
(ptA, ptB) = (pred_boxpts[0], pred_boxpts[1])
(startX, startY) = (ptA[0], ptA[1])
y = startY - 15 if startY - 15 > 15 else startY + 15
# display the rectangle and label text
cv2.rectangle(image_for_result, ptA, ptB,
COLORS[pred_class], 2)
cv2.putText(image_for_result, label, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 1, COLORS[pred_class], 3)
# check if we should display the frame on the screen
# with prediction data (you can achieve faster FPS if you
# do not output to the screen)
if args["display"] > 0:
# display the frame to the screen
cv2.imshow("Output", image_for_result)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
# update the FPS counter
# if "ctrl+c" is pressed in the terminal, break from the loop
except KeyboardInterrupt:
# if there's a problem reading a frame, break gracefully
except AttributeError:
# stop the FPS counter timer
# destroy all windows if we are displaying them
if args["display"] > 0:
# stop the video stream
# clean up the graph and device
#[INFO v1.0] graph.DeallocateGraph()
#[INFO v1.0] device.CloseDevice()
# display FPS information
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.