Created
January 16, 2019 08:45
-
-
Save treinberger/5e8bdd41e70c1d3fdae304140d30e182 to your computer and use it in GitHub Desktop.
Tensorflow + PiCamera object detection
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######## Picamera Object Detection Using Tensorflow Classifier ######### | |
# | |
# Author: Evan Juras | |
# Date: 4/15/18 | |
# Description: | |
# This program uses a TensorFlow classifier to perform object detection. | |
# It loads the classifier uses it to perform object detection on a Picamera feed. | |
# It draws boxes and scores around the objects of interest in each frame from | |
# the Picamera. It also can be used with a webcam by adding "--usbcam" | |
# when executing this script from the terminal. | |
## Some of the code is copied from Google's example at | |
## https://github.com/tensorflow/models/blob/master/research/object_detection/object_detection_tutorial.ipynb | |
## and some is copied from Dat Tran's example at | |
## https://github.com/datitran/object_detector_app/blob/master/object_detection_app.py | |
## but I changed it to make it more understandable to me. | |
# Import packages | |
import os | |
import cv2 | |
import numpy as np | |
from picamera.array import PiRGBArray | |
from picamera import PiCamera | |
import tensorflow as tf | |
import argparse | |
import sys | |
import time | |
from imutils.video.pivideostream import PiVideoStream | |
# Set up camera constants | |
IM_WIDTH = 640 | |
IM_HEIGHT = 480 | |
#IM_WIDTH = 640 Use smaller resolution for | |
#IM_HEIGHT = 480 slightly faster framerate | |
# Select camera type (if user enters --usbcam when calling this script, | |
# a USB webcam will be used) | |
camera_type = 'picamera' | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--usbcam', help='Use a USB webcam instead of picamera', | |
action='store_true') | |
args = parser.parse_args() | |
if args.usbcam: | |
camera_type = 'usb' | |
# This is needed since the working directory is the object_detection folder. | |
sys.path.append('..') | |
# Import utilites | |
from utils import label_map_util | |
from utils import visualization_utils as vis_util | |
# Name of the directory containing the object detection module we're using | |
MODEL_NAME = 'ssdlite_mobilenet_v2_coco_2018_05_09' | |
# Grab path to current working directory | |
CWD_PATH = os.getcwd() | |
# Path to frozen detection graph .pb file, which contains the model that is used | |
# for object detection. | |
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb') | |
# Path to label map file | |
PATH_TO_LABELS = os.path.join(CWD_PATH,'data','mscoco_label_map.pbtxt') | |
# Number of classes the object detector can identify | |
NUM_CLASSES = 13 | |
## Load the label map. | |
# Label maps map indices to category names, so that when the convolution | |
# network predicts `5`, we know that this corresponds to `airplane`. | |
# Here we use internal utility functions, but anything that returns a | |
# dictionary mapping integers to appropriate string labels would be fine | |
label_map = label_map_util.load_labelmap(PATH_TO_LABELS) | |
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) | |
category_index = label_map_util.create_category_index(categories) | |
# Load the Tensorflow model into memory. | |
detection_graph = tf.Graph() | |
with detection_graph.as_default(): | |
od_graph_def = tf.GraphDef() | |
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: | |
serialized_graph = fid.read() | |
od_graph_def.ParseFromString(serialized_graph) | |
tf.import_graph_def(od_graph_def, name='') | |
sess = tf.Session(graph=detection_graph) | |
# Define input and output tensors (i.e. data) for the object detection classifier | |
# Input tensor is the image | |
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') | |
# Output tensors are the detection boxes, scores, and classes | |
# Each box represents a part of the image where a particular object was detected | |
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') | |
# Each score represents level of confidence for each of the objects. | |
# The score is shown on the result image, together with the class label. | |
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') | |
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') | |
# Number of objects detected | |
num_detections = detection_graph.get_tensor_by_name('num_detections:0') | |
# Initialize frame rate calculation | |
frame_rate_calc = 1 | |
freq = cv2.getTickFrequency() | |
font = cv2.FONT_HERSHEY_SIMPLEX | |
# Initialize camera and perform object detection. | |
# The camera has to be set up and used differently depending on if it's a | |
# Picamera or USB webcam. | |
# I know this is ugly, but I basically copy+pasted the code for the object | |
# detection loop twice, and made one work for Picamera and the other work | |
# for USB. | |
### Picamera ### | |
if camera_type == 'picamera': | |
vs = PiVideoStream((IM_WIDTH, IM_HEIGHT)).start() | |
time.sleep(2) | |
while True: | |
frame = vs.read().copy() | |
t1 = cv2.getTickCount() | |
frame_expanded = np.expand_dims(frame, axis=0) | |
# Perform the actual detection by running the model with the image as input | |
(boxes, scores, classes, num) = sess.run( | |
[detection_boxes, detection_scores, detection_classes, num_detections], | |
feed_dict={image_tensor: frame_expanded}) | |
# Draw the results of the detection (aka 'visulaize the results') | |
vis_util.visualize_boxes_and_labels_on_image_array( | |
frame, | |
np.squeeze(boxes), | |
np.squeeze(classes).astype(np.int32), | |
np.squeeze(scores), | |
category_index, | |
use_normalized_coordinates=True, | |
line_thickness=8, | |
min_score_thresh=0.40) | |
cv2.putText(frame,"FPS: {0:.2f}".format(frame_rate_calc),(30,50),font,1,(255,255,0),2,cv2.LINE_AA) | |
# All the results have been drawn on the frame, so it's time to display it. | |
cv2.imshow('Object detector', frame) | |
t2 = cv2.getTickCount() | |
time1 = (t2-t1)/freq | |
frame_rate_calc = 1/time1 | |
# Press 'q' to quit | |
if cv2.waitKey(1) == ord('q'): | |
break | |
#rawCapture.truncate(0) | |
camera.close() | |
### USB webcam ### | |
elif camera_type == 'usb': | |
# Initialize USB webcam feed | |
camera = cv2.VideoCapture(0) | |
ret = camera.set(3,IM_WIDTH) | |
ret = camera.set(4,IM_HEIGHT) | |
while(True): | |
t1 = cv2.getTickCount() | |
# Acquire frame and expand frame dimensions to have shape: [1, None, None, 3] | |
# i.e. a single-column array, where each item in the column has the pixel RGB value | |
ret, frame = camera.read() | |
frame_expanded = np.expand_dims(frame, axis=0) | |
# Perform the actual detection by running the model with the image as input | |
(boxes, scores, classes, num) = sess.run( | |
[detection_boxes, detection_scores, detection_classes, num_detections], | |
feed_dict={image_tensor: frame_expanded}) | |
# Draw the results of the detection (aka 'visulaize the results') | |
vis_util.visualize_boxes_and_labels_on_image_array( | |
frame, | |
np.squeeze(boxes), | |
np.squeeze(classes).astype(np.int32), | |
np.squeeze(scores), | |
category_index, | |
use_normalized_coordinates=True, | |
line_thickness=8, | |
min_score_thresh=0.85) | |
cv2.putText(frame,"FPS: {0:.2f}".format(frame_rate_calc),(30,50),font,1,(255,255,0),2,cv2.LINE_AA) | |
# All the results have been drawn on the frame, so it's time to display it. | |
cv2.imshow('Object detector', frame) | |
t2 = cv2.getTickCount() | |
time1 = (t2-t1)/freq | |
frame_rate_calc = 1/time1 | |
# Press 'q' to quit | |
if cv2.waitKey(1) == ord('q'): | |
break | |
camera.release() | |
cv2.destroyAllWindows() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment