Last active
June 1, 2019 22:51
-
-
Save bigsnarfdude/faebb3ca96805836f13ddef09c1c52c6 to your computer and use it in GitHub Desktop.
person_detection_webcam_tensorflow.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import cv2 | |
import time | |
import argparse | |
import multiprocessing | |
import numpy as np | |
import tensorflow as tf | |
from cam_utils import FPS, WebcamVideoStream | |
from multiprocessing import Queue, Pool | |
from object_detection.utils import label_map_util | |
from object_detection.utils import visualization_utils as vis_util | |
CWD_PATH = os.getcwd() | |
# Path to frozen detection graph. This is the actual model that is used for the object detection. | |
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017' | |
PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, 'frozen_inference_graph.pb') | |
# List of the strings that is used to add correct label for each box. | |
PATH_TO_LABELS = os.path.join(CWD_PATH, 'data', 'mscoco_label_map_nl.pbtxt') | |
NUM_CLASSES = 90 | |
# Loading label map | |
label_map = label_map_util.load_labelmap(PATH_TO_LABELS) | |
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, | |
use_display_name=True) | |
category_index = label_map_util.create_category_index(categories) | |
def detect_objects(image_np, sess, detection_graph): | |
# Expand dimensions since the model expects images to have shape: [1, None, None, 3] | |
image_np_expanded = np.expand_dims(image_np, axis=0) | |
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') | |
# Each box represents a part of the image where a particular object was detected. | |
boxes = detection_graph.get_tensor_by_name('detection_boxes:0') | |
# Each score represent how level of confidence for each of the objects. | |
# Score is shown on the result image, together with the class label. | |
scores = detection_graph.get_tensor_by_name('detection_scores:0') | |
classes = detection_graph.get_tensor_by_name('detection_classes:0') | |
num_detections = detection_graph.get_tensor_by_name('num_detections:0') | |
(boxes, scores, classes, num_detections) = sess.run([boxes, scores, classes, num_detections], | |
feed_dict={image_tensor: image_np_expanded}) | |
boxes = np.squeeze(boxes) # list of floats less than .99 | |
scores = np.squeeze(scores) # list of floats less than .99 | |
classes = np.squeeze(classes) # list of ints 1s and 0 | |
indices = np.argwhere(classes == 1) | |
boxes = np.squeeze(boxes[indices]) | |
scores = np.squeeze(scores[indices]) | |
classes = np.squeeze(classes[indices]) | |
#print(boxes) | |
#print(scores) | |
#print(classes) | |
print(sum(scores > 0.66)) # counts how many persons detected in frame | |
vis_util.visualize_boxes_and_labels_on_image_array( | |
image_np, | |
boxes, | |
classes.astype(np.int32), | |
scores, | |
category_index, | |
use_normalized_coordinates=True, | |
line_thickness=8) | |
return image_np | |
def worker(input_q, output_q): | |
# Load a (frozen) Tensorflow model into memory. | |
detection_graph = tf.Graph() | |
with detection_graph.as_default(): | |
od_graph_def = tf.GraphDef() | |
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: | |
serialized_graph = fid.read() | |
od_graph_def.ParseFromString(serialized_graph) | |
tf.import_graph_def(od_graph_def, name='') | |
sess = tf.Session(graph=detection_graph) | |
fps = FPS().start() | |
while True: | |
fps.update() | |
frame = input_q.get() | |
output_q.put(detect_objects(frame, sess, detection_graph)) | |
fps.stop() | |
sess.close() | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-src', '--source', dest='video_source', type=int, | |
default=0, help='Device index of the camera.') | |
parser.add_argument('-wd', '--width', dest='width', type=int, | |
default=480, help='Width of the frames in the video stream.') | |
parser.add_argument('-ht', '--height', dest='height', type=int, | |
default=360, help='Height of the frames in the video stream.') | |
parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int, | |
default=2, help='Number of workers.') | |
parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int, | |
default=5, help='Size of the queue.') | |
args = parser.parse_args() | |
logger = multiprocessing.log_to_stderr() | |
logger.setLevel(multiprocessing.SUBDEBUG) | |
input_q = Queue(maxsize=args.queue_size) | |
output_q = Queue(maxsize=args.queue_size) | |
pool = Pool(args.num_workers, worker, (input_q, output_q)) | |
#print(args.video_source) | |
#print(args.width) | |
#print(args.height) | |
video_capture = WebcamVideoStream(src=args.video_source, | |
width=args.width, | |
height=args.height).start() | |
fps = FPS().start() | |
while True: # fps._numFrames < 120 | |
frame = video_capture.read() | |
input_q.put(frame) | |
t = time.time() | |
#print(output_q.get()) | |
cv2.imshow('Video', output_q.get()) | |
fps.update() | |
print('[INFO] elapsed time: {:.2f}'.format(time.time() - t)) | |
if cv2.waitKey(1) & 0xFF == ord('q'): | |
break | |
fps.stop() | |
print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed())) | |
print('[INFO] approx. FPS: {:.2f}'.format(fps.fps())) | |
pool.terminate() | |
video_capture.stop() | |
cv2.destroyAllWindows() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment