a2chub/raspi4_edgetpu_py37_object_detection.py

## raspi4_edgetpu_py37_object_detection.py
"""A demo which runs object detection on OpenCV frames."""

import argparse
import cv2
import logging
import os

import edgetpu.detection.engine

FONT = cv2.FONT_HERSHEY_SIMPLEX
FONT_SCALE = 1.0
FONT_THICKNESS = 2
FONT_COLOR = (0, 0, 255)
BOX_THICKNESS = 2
BOX_COLOR = (255, 0, 0)

def labels():
    with open('models/coco_labels.txt') as f:
        for l in f.read().split('\n'):
            parts = l.split(' ')
            yield int(parts[0]), ' '.join(parts[1:]).strip()
labels = dict(labels())
print(labels)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path.', required=True)
    args = parser.parse_args()

    video_capture = cv2.VideoCapture(1)
    video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    video_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

    engine = edgetpu.detection.engine.DetectionEngine(os.path.join(args.model))
    input_tensor_size = tuple(engine.get_input_tensor_shape()[1:3])

    while video_capture.isOpened():
        # Capture frame-by-frame.
        ret, frame = video_capture.read()
        if not ret:
            logging.error('video capture failed')
            break
        frame = cv2.copyMakeBorder(frame, 0, 160, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0])
        # Convert to tensor input format.
        input_tensor = cv2.cvtColor(cv2.resize(frame, input_tensor_size),
                                    cv2.COLOR_BGR2RGB).flatten()
        # Run Inference.
        result = engine.DetectWithInputTensor(input_tensor,
                                              threshold=0.5, top_k=10)
        # Process results.
        if result:
            for obj in result:
                # Draw bounding box in frame coordinate space.
                frame_h, frame_w, _ = frame.shape
                bbox = (obj.bounding_box.flatten()
                        * [frame_w, frame_h, frame_w, frame_h]).astype(int)
                cv2.rectangle(frame, tuple(bbox[:2]), tuple(bbox[2:]),
                              BOX_COLOR, BOX_THICKNESS)
                label = labels[obj.label_id] if obj.label_id in labels else str(obj.label_id)
                cv2.putText(frame, '%s:%.2f' % (label, obj.score), tuple(bbox[:2]), FONT, FONT_SCALE, FONT_COLOR, FONT_THICKNESS)

        # Display inference time.
        text = '(q to exit) inference: {:>7.3f}ms'.format(
            engine.get_inference_time())
        size, _ = cv2.getTextSize(text, FONT, FONT_SCALE, FONT_THICKNESS)
        cv2.putText(frame, text, (0, size[1]),
                    FONT, FONT_SCALE, FONT_COLOR, FONT_THICKNESS)

        # Display the resulting frame
        cv2.imshow('Video', frame)

        # Wait for keypress.
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the capture
    video_capture.release()
    cv2.destroyAllWindows()


if __name__ == '__main__':
    main()
	"""A demo which runs object detection on OpenCV frames."""

	import argparse
	import cv2
	import logging
	import os

	import edgetpu.detection.engine

	FONT = cv2.FONT_HERSHEY_SIMPLEX
	FONT_SCALE = 1.0
	FONT_THICKNESS = 2
	FONT_COLOR = (0, 0, 255)
	BOX_THICKNESS = 2
	BOX_COLOR = (255, 0, 0)

	def labels():
	with open('models/coco_labels.txt') as f:
	for l in f.read().split('\n'):
	parts = l.split(' ')
	yield int(parts[0]), ' '.join(parts[1:]).strip()
	labels = dict(labels())
	print(labels)


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--model', help='.tflite model path.', required=True)
	args = parser.parse_args()

	video_capture = cv2.VideoCapture(1)
	video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
	video_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

	engine = edgetpu.detection.engine.DetectionEngine(os.path.join(args.model))
	input_tensor_size = tuple(engine.get_input_tensor_shape()[1:3])

	while video_capture.isOpened():
	# Capture frame-by-frame.
	ret, frame = video_capture.read()
	if not ret:
	logging.error('video capture failed')
	break
	frame = cv2.copyMakeBorder(frame, 0, 160, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0])
	# Convert to tensor input format.
	input_tensor = cv2.cvtColor(cv2.resize(frame, input_tensor_size),
	cv2.COLOR_BGR2RGB).flatten()
	# Run Inference.
	result = engine.DetectWithInputTensor(input_tensor,
	threshold=0.5, top_k=10)
	# Process results.
	if result:
	for obj in result:
	# Draw bounding box in frame coordinate space.
	frame_h, frame_w, _ = frame.shape
	bbox = (obj.bounding_box.flatten()
	* [frame_w, frame_h, frame_w, frame_h]).astype(int)
	cv2.rectangle(frame, tuple(bbox[:2]), tuple(bbox[2:]),
	BOX_COLOR, BOX_THICKNESS)
	label = labels[obj.label_id] if obj.label_id in labels else str(obj.label_id)
	cv2.putText(frame, '%s:%.2f' % (label, obj.score), tuple(bbox[:2]), FONT, FONT_SCALE, FONT_COLOR, FONT_THICKNESS)

	# Display inference time.
	text = '(q to exit) inference: {:>7.3f}ms'.format(
	engine.get_inference_time())
	size, _ = cv2.getTextSize(text, FONT, FONT_SCALE, FONT_THICKNESS)
	cv2.putText(frame, text, (0, size[1]),
	FONT, FONT_SCALE, FONT_COLOR, FONT_THICKNESS)

	# Display the resulting frame
	cv2.imshow('Video', frame)

	# Wait for keypress.
	if cv2.waitKey(1) & 0xFF == ord('q'):
	break

	# Release the capture
	video_capture.release()
	cv2.destroyAllWindows()


	if __name__ == '__main__':
	main()