karolmajek/detect-objects-cv2.py Secret

## detect-objects-cv2.py
import tensorflow as tf
import numpy as np
import cv2
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = '/content/exported_model/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = '/content/models/research/object_detection/data/pet_label_map.pbtxt'

# Path to file or use webcam
VIDEO_FILE = 0 # 0 -> Use First Webcam; 1 -> 2nd; filename -> predict using file

SHOW_WINDOW = TRUE

detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS)

cap = cv2.VideoCapture(0)
counter = 0

with detection_graph.as_default():
    with tf.Session() as sess:
        # Get handles to input and output tensors
        ops = tf.get_default_graph().get_operations()
        all_tensor_names = {output.name for op in ops for output in op.outputs}
        tensor_dict = {}
        for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
          ]:
            tensor_name = key + ':0'
            if tensor_name in all_tensor_names:
                tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
                                                                    tensor_name)

        image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')


        counter_run_once = 0

        while(True):
            ret, frame = cap.read()
            if not ret:
                break

            if 'detection_masks' in tensor_dict and counter_run_once==0:
                counter_run_once=1
                # The following processing is only for single image
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
                detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
                detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, frame.shape[0], frame.shape[1])
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                # Follow the convention by adding back the batch dimension
                tensor_dict['detection_masks'] = tf.expand_dims(
                detection_masks_reframed, 0)

            # Run inference
            output_dict = sess.run(tensor_dict,
                                 feed_dict={image_tensor: np.expand_dims(frame, 0)})

            # all outputs are float32 numpy arrays, so convert types as appropriate
            output_dict['num_detections'] = int(output_dict['num_detections'][0])
            output_dict['detection_classes'] = output_dict[
              'detection_classes'][0].astype(np.uint8)
            output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
            output_dict['detection_scores'] = output_dict['detection_scores'][0]
            if 'detection_masks' in output_dict:
                output_dict['detection_masks'] = output_dict['detection_masks'][0]
                vis_util.visualize_boxes_and_labels_on_image_array(
                frame,
                output_dict['detection_boxes'],
                output_dict['detection_classes'],
                output_dict['detection_scores'],
                category_index,
                instance_masks=output_dict.get('detection_masks'),
                use_normalized_coordinates=True,
                line_thickness=8,
                min_score_thresh=0.1)
            # cv2.imwrite('img%08d.jpg'%counter, frame);

            vis_util.visualize_boxes_and_labels_on_image_array(
              frame,
              output_dict['detection_boxes'],
              output_dict['detection_classes'],
              output_dict['detection_scores'],
              category_index,
              instance_masks=output_dict.get('detection_masks'),
              use_normalized_coordinates=True,
              line_thickness=8,
              min_score_thresh=0.1)

            if SHOW_WINDOW:
              cv2.imshow('Result',frame)
              cv2.waitKey(10)
            counter = counter+1
	import tensorflow as tf
	import numpy as np
	import cv2
	from object_detection.utils import ops as utils_ops
	from object_detection.utils import label_map_util
	from object_detection.utils import visualization_utils as vis_util

	# Path to frozen detection graph. This is the actual model that is used for the object detection.
	PATH_TO_FROZEN_GRAPH = '/content/exported_model/frozen_inference_graph.pb'

	# List of the strings that is used to add correct label for each box.
	PATH_TO_LABELS = '/content/models/research/object_detection/data/pet_label_map.pbtxt'

	# Path to file or use webcam
	VIDEO_FILE = 0 # 0 -> Use First Webcam; 1 -> 2nd; filename -> predict using file

	SHOW_WINDOW = TRUE

	detection_graph = tf.Graph()
	with detection_graph.as_default():
	od_graph_def = tf.GraphDef()
	with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
	serialized_graph = fid.read()
	od_graph_def.ParseFromString(serialized_graph)
	tf.import_graph_def(od_graph_def, name='')

	category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS)

	cap = cv2.VideoCapture(0)
	counter = 0

	with detection_graph.as_default():
	with tf.Session() as sess:
	# Get handles to input and output tensors
	ops = tf.get_default_graph().get_operations()
	all_tensor_names = {output.name for op in ops for output in op.outputs}
	tensor_dict = {}
	for key in [
	'num_detections', 'detection_boxes', 'detection_scores',
	'detection_classes', 'detection_masks'
	]:
	tensor_name = key + ':0'
	if tensor_name in all_tensor_names:
	tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
	tensor_name)

	image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')


	counter_run_once = 0

	while(True):
	ret, frame = cap.read()
	if not ret:
	break

	if 'detection_masks' in tensor_dict and counter_run_once==0:
	counter_run_once=1
	# The following processing is only for single image
	detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
	detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
	# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
	real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
	detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
	detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
	detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
	detection_masks, detection_boxes, frame.shape[0], frame.shape[1])
	detection_masks_reframed = tf.cast(
	tf.greater(detection_masks_reframed, 0.5), tf.uint8)
	# Follow the convention by adding back the batch dimension
	tensor_dict['detection_masks'] = tf.expand_dims(
	detection_masks_reframed, 0)

	# Run inference
	output_dict = sess.run(tensor_dict,
	feed_dict={image_tensor: np.expand_dims(frame, 0)})

	# all outputs are float32 numpy arrays, so convert types as appropriate
	output_dict['num_detections'] = int(output_dict['num_detections'][0])
	output_dict['detection_classes'] = output_dict[
	'detection_classes'][0].astype(np.uint8)
	output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
	output_dict['detection_scores'] = output_dict['detection_scores'][0]
	if 'detection_masks' in output_dict:
	output_dict['detection_masks'] = output_dict['detection_masks'][0]
	vis_util.visualize_boxes_and_labels_on_image_array(
	frame,
	output_dict['detection_boxes'],
	output_dict['detection_classes'],
	output_dict['detection_scores'],
	category_index,
	instance_masks=output_dict.get('detection_masks'),
	use_normalized_coordinates=True,
	line_thickness=8,
	min_score_thresh=0.1)
	# cv2.imwrite('img%08d.jpg'%counter, frame);

	vis_util.visualize_boxes_and_labels_on_image_array(
	frame,
	output_dict['detection_boxes'],
	output_dict['detection_classes'],
	output_dict['detection_scores'],
	category_index,
	instance_masks=output_dict.get('detection_masks'),
	use_normalized_coordinates=True,
	line_thickness=8,
	min_score_thresh=0.1)

	if SHOW_WINDOW:
	cv2.imshow('Result',frame)
	cv2.waitKey(10)
	counter = counter+1