Last active
August 26, 2023 04:44
-
-
Save Tony607/f4ad0c624f2e462d7214398278e25515 to your computer and use it in GitHub Desktop.
Object detection with webcam on Raspberry Pi and TensorFlow 1.9
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######## WebCam Object Detection Using Tensorflow Classifier ######### | |
# Description: | |
# This program uses a TensorFlow classifier to perform object detection. | |
# It loads the classifier uses it to perform object detection on a WebCam feed. | |
# It draws boxes and scores around the objects of interest in each frame from | |
# the WebCam. It also can be used with picamera by adding "--picamera" | |
# when executing this script from the terminal. | |
# Import packages | |
import os | |
import cv2 | |
import numpy as np | |
from picamera.array import PiRGBArray | |
from picamera import PiCamera | |
import tensorflow as tf | |
import argparse | |
import sys | |
import time | |
lastMs = int(round(time.time() * 1000)) | |
# Set up camera constants | |
# IM_WIDTH = 1280 | |
# IM_HEIGHT = 720 | |
IM_WIDTH = 640 | |
IM_HEIGHT = 480 | |
# Select camera type (if user enters --usbcam when calling this script, | |
# a USB webcam will be used) | |
camera_type = 'usb' | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--picamera', help='Use a picamera instead of USB webcam', | |
action='store_true') | |
args = parser.parse_args() | |
if args.picamera: | |
camera_type = 'picamera' | |
# This is needed since the working directory is the object_detection folder. | |
sys.path.append('..') | |
# Import utilites | |
from utils import label_map_util | |
from utils import visualization_utils as vis_util | |
# Name of the directory containing the object detection module we're using | |
MODEL_NAME = 'ssdlite_mobilenet_v2_coco_2018_05_09' | |
# Grab path to current working directory | |
CWD_PATH = os.getcwd() | |
# Path to frozen detection graph .pb file, which contains the model that is used | |
# for object detection. | |
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb') | |
# Path to label map file | |
PATH_TO_LABELS = os.path.join(CWD_PATH,'data','mscoco_label_map.pbtxt') | |
# Number of classes the object detector can identify | |
NUM_CLASSES = 90 | |
## Load the label map. | |
# Label maps map indices to category names, so that when the convolution | |
# network predicts `5`, we know that this corresponds to `airplane`. | |
# Here we use internal utility functions, but anything that returns a | |
# dictionary mapping integers to appropriate string labels would be fine | |
label_map = label_map_util.load_labelmap(PATH_TO_LABELS) | |
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) | |
category_index = label_map_util.create_category_index(categories) | |
# Load the Tensorflow model into memory. | |
detection_graph = tf.Graph() | |
with detection_graph.as_default(): | |
od_graph_def = tf.GraphDef() | |
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: | |
serialized_graph = fid.read() | |
od_graph_def.ParseFromString(serialized_graph) | |
tf.import_graph_def(od_graph_def, name='') | |
sess = tf.Session(graph=detection_graph) | |
# Define input and output tensors (i.e. data) for the object detection classifier | |
# Input tensor is the image | |
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') | |
# Output tensors are the detection boxes, scores, and classes | |
# Each box represents a part of the image where a particular object was detected | |
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') | |
# Each score represents level of confidence for each of the objects. | |
# The score is shown on the result image, together with the class label. | |
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') | |
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') | |
# Number of objects detected | |
num_detections = detection_graph.get_tensor_by_name('num_detections:0') | |
# Initialize frame rate calculation | |
frame_rate_calc = 1 | |
freq = cv2.getTickFrequency() | |
font = cv2.FONT_HERSHEY_SIMPLEX | |
# Initialize camera and perform object detection. | |
# The camera has to be set up and used differently depending on if it's a | |
# Picamera or USB webcam. | |
# I know this is ugly, but I basically copy+pasted the code for the object | |
# detection loop twice, and made one work for Picamera and the other work | |
# for USB. | |
### Picamera ### | |
if camera_type == 'picamera': | |
# Initialize Picamera and grab reference to the raw capture | |
camera = PiCamera() | |
camera.resolution = (IM_WIDTH,IM_HEIGHT) | |
camera.framerate = 10 | |
rawCapture = PiRGBArray(camera, size=(IM_WIDTH,IM_HEIGHT)) | |
rawCapture.truncate(0) | |
for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True): | |
t1 = cv2.getTickCount() | |
# Acquire frame and expand frame dimensions to have shape: [1, None, None, 3] | |
# i.e. a single-column array, where each item in the column has the pixel RGB value | |
frame = frame1.array | |
frame.setflags(write=1) | |
frame_expanded = np.expand_dims(frame, axis=0) | |
# Perform the actual detection by running the model with the image as input | |
(boxes, scores, classes, num) = sess.run( | |
[detection_boxes, detection_scores, detection_classes, num_detections], | |
feed_dict={image_tensor: frame_expanded}) | |
# Draw the results of the detection (aka 'visulaize the results') | |
vis_util.visualize_boxes_and_labels_on_image_array( | |
frame, | |
np.squeeze(boxes), | |
np.squeeze(classes).astype(np.int32), | |
np.squeeze(scores), | |
category_index, | |
use_normalized_coordinates=True, | |
line_thickness=8, | |
min_score_thresh=0.40) | |
cv2.putText(frame,"FPS: {0:.2f}".format(frame_rate_calc),(30,50),font,1,(255,255,0),2,cv2.LINE_AA) | |
# All the results have been drawn on the frame, so it's time to display it. | |
cv2.imshow('Object detector', frame) | |
t2 = cv2.getTickCount() | |
time1 = (t2-t1)/freq | |
frame_rate_calc = 1/time1 | |
# Press 'q' to quit | |
if cv2.waitKey(1) == ord('q'): | |
break | |
rawCapture.truncate(0) | |
camera.close() | |
### USB webcam ### | |
elif camera_type == 'usb': | |
# Initialize USB webcam feed | |
camera = cv2.VideoCapture(0) | |
if ((camera == None) or (not camera.isOpened())): | |
print('\n\n') | |
print('Error - could not open video device.') | |
print('\n\n') | |
exit(0) | |
ret = camera.set(cv2.CAP_PROP_FRAME_WIDTH,IM_WIDTH) | |
ret = camera.set(cv2.CAP_PROP_FRAME_HEIGHT,IM_HEIGHT) | |
# save the actual dimensions | |
actual_video_width = camera.get(cv2.CAP_PROP_FRAME_WIDTH) | |
actual_video_height = camera.get(cv2.CAP_PROP_FRAME_HEIGHT) | |
print('actual video resolution: ' + str(actual_video_width) + ' x ' + str(actual_video_height)) | |
frame_count = 0 | |
while(True): | |
t1 = cv2.getTickCount() | |
# Acquire frame and expand frame dimensions to have shape: [1, None, None, 3] | |
# i.e. a single-column array, where each item in the column has the pixel RGB value | |
# Discard old frames stored in internal buffer memory to avoid lags. | |
# Only get the newest frame, the image buffer has fixed size of 5, we | |
# use grab() to discard the first 5 stale frames, and decode a fresh frame | |
for i in range(5): | |
camera.grab() | |
ret, frame = camera.read() | |
frame_expanded = np.expand_dims(frame, axis=0) | |
# Perform the actual detection by running the model with the image as input | |
(boxes, scores, classes, num) = sess.run( | |
[detection_boxes, detection_scores, detection_classes, num_detections], | |
feed_dict={image_tensor: frame_expanded}) | |
# Draw the results of the detection (aka 'visulaize the results') | |
vis_util.visualize_boxes_and_labels_on_image_array( | |
frame, | |
np.squeeze(boxes), | |
np.squeeze(classes).astype(np.int32), | |
np.squeeze(scores), | |
category_index, | |
use_normalized_coordinates=True, | |
line_thickness=8, | |
min_score_thresh=0.85) | |
cv2.putText(frame,"FPS: {0:.2f} frame: {1}".format(frame_rate_calc, frame_count),(30,50),font,1,(255,255,0),2,cv2.LINE_AA) | |
# All the results have been drawn on the frame, so it's time to display it. | |
cv2.imshow('Object detector', frame) | |
t2 = cv2.getTickCount() | |
time1 = (t2-t1)/freq | |
frame_rate_calc = 1/time1 | |
# Press 'q' to quit | |
if cv2.waitKey(1) == ord('q'): | |
break | |
frame_count += 1 | |
camera.release() | |
cv2.destroyAllWindows() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment