iwatake2222/detection_PC.py

## detection_PC.py
# -*- coding: utf-8 -*-
import cv2
import tensorflow as tf
import numpy as np

# https://www.tensorflow.org/lite/guide/hosted_models
# http://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip


def detect_from_camera():
	# load model
	interpreter = tf.lite.Interpreter(model_path="detect.tflite")
	interpreter.allocate_tensors()
	input_details = interpreter.get_input_details()
	output_details = interpreter.get_output_details()

	cap = cv2.VideoCapture(0) # 0はカメラのデバイス番号
	while True:
		# capture image
		ret, img_org = cap.read()
#		cv2.imshow('image', img_org)
		key = cv2.waitKey(1)
		if key == 27: # ESC
			break

		# prepara input image
		img = cv2.cvtColor(img_org, cv2.COLOR_BGR2RGB)
		img = cv2.resize(img, (300, 300))
		img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2]) # (1, 300, 300, 3)
		img = img.astype(np.uint8)

		# set input tensor
		interpreter.set_tensor(input_details[0]['index'], img)

		# run
		interpreter.invoke()

		# get outpu tensor
		boxes = interpreter.get_tensor(output_details[0]['index'])
		labels = interpreter.get_tensor(output_details[1]['index'])
		scores = interpreter.get_tensor(output_details[2]['index'])
		num = interpreter.get_tensor(output_details[3]['index'])

		for i in range(boxes.shape[1]):
			if scores[0, i] > 0.5:
				box = boxes[0, i, :]
				x0 = int(box[1] * img_org.shape[1])
				y0 = int(box[0] * img_org.shape[0])
				x1 = int(box[3] * img_org.shape[1])
				y1 = int(box[2] * img_org.shape[0])
				box = box.astype(np.int)
				cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
				cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
				cv2.putText(img_org,
					   str(int(labels[0, i])),
					   (x0, y0),
					   cv2.FONT_HERSHEY_SIMPLEX,
					   1,
					   (255, 255, 255),
					   2)

	#	cv2.imwrite('output.jpg', img_org)
		cv2.imshow('image', img_org)

	cap.release()
	cv2.destroyAllWindows()


def detect_from_image():
	# prepara input image
	img_org = cv2.imread('input.jpg')
#	cv2.imshow('image', img)
	img = cv2.cvtColor(img_org, cv2.COLOR_BGR2RGB)
	img = cv2.resize(img, (300, 300))
	img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2]) # (1, 300, 300, 3)
	img = img.astype(np.uint8)

	# load model
	interpreter = tf.lite.Interpreter(model_path="detect.tflite")
	interpreter.allocate_tensors()
	input_details = interpreter.get_input_details()
	output_details = interpreter.get_output_details()

	# set input tensor
	interpreter.set_tensor(input_details[0]['index'], img)

	# run
	interpreter.invoke()

	# get outpu tensor
	boxes = interpreter.get_tensor(output_details[0]['index'])
	labels = interpreter.get_tensor(output_details[1]['index'])
	scores = interpreter.get_tensor(output_details[2]['index'])
	num = interpreter.get_tensor(output_details[3]['index'])

	for i in range(boxes.shape[1]):
		if scores[0, i] > 0.5:
			box = boxes[0, i, :]
			x0 = int(box[1] * img_org.shape[1])
			y0 = int(box[0] * img_org.shape[0])
			x1 = int(box[3] * img_org.shape[1])
			y1 = int(box[2] * img_org.shape[0])
			box = box.astype(np.int)
			cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
			cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
			cv2.putText(img_org,
				   str(int(labels[0, i])),
				   (x0, y0),
				   cv2.FONT_HERSHEY_SIMPLEX,
				   1,
				   (255, 255, 255),
				   2)

#	cv2.imwrite('output.jpg', img_org)
	cv2.imshow('image', img_org)
	cv2.waitKey(0)
	cv2.destroyAllWindows()


if __name__ == '__main__':
	detect_from_camera()
	detect_from_image()


## jetson_detection_cv.py
import time
# import picamera
# import picamera.array
import cv2
from PIL import Image
from edgetpu.detection.engine import DetectionEngine

MODEL_NAME = "mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite"

def cv2pil(image_cv):
	image_cv = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)
	image_pil = Image.fromarray(image_cv)
	image_pil = image_pil.convert('RGB')
	return image_pil

label2string = \
{
	0:   "person",
	1:   "bicycle",
	2:   "car",
	3:   "motorcycle",
	4:   "airplane",
	5:   "bus",
	6:   "train",
	7:   "truck",
	8:   "boat",
	9:   "traffic light",
	10:  "fire hydrant",
	12:  "stop sign",
	13:  "parking meter",
	14:  "bench",
	15:  "bird",
	16:  "cat",
	17:  "dog",
	18:  "horse",
	19:  "sheep",
	20:  "cow",
	21:  "elephant",
	22:  "bear",
	23:  "zebra",
	24:  "giraffe",
	26:  "backpack",
	27:  "umbrella",
	30:  "handbag",
	31:  "tie",
	32:  "suitcase",
	33:  "frisbee",
	34:  "skis",
	35:  "snowboard",
	36:  "sports ball",
	37:  "kite",
	38:  "baseball bat",
	39:  "baseball glove",
	40:  "skateboard",
	41:  "surfboard",
	42:  "tennis racket",
	43:  "bottle",
	45:  "wine glass",
	46:  "cup",
	47:  "fork",
	48:  "knife",
	49:  "spoon",
	50:  "bowl",
	51:  "banana",
	52:  "apple",
	53:  "sandwich",
	54:  "orange",
	55:  "broccoli",
	56:  "carrot",
	57:  "hot dog",
	58:  "pizza",
	59:  "donut",
	60:  "cake",
	61:  "chair",
	62:  "couch",
	63:  "potted plant",
	64:  "bed",
	66:  "dining table",
	69:  "toilet",
	71:  "tv",
	72:  "laptop",
	73:  "mouse",
	74:  "remote",
	75:  "keyboard",
	76:  "cell phone",
	77:  "microwave",
	78:  "oven",
	79:  "toaster",
	80:  "sink",
	81:  "refrigerator",
	83:  "book",
	84:  "clock",
	85:  "vase",
	86:  "scissors",
	87:  "teddy bear",
	88:  "hair drier",
	89:  "toothbrush",
}

def detect_from_image():
	# Load model and prepare TPU engine
	engine = DetectionEngine(MODEL_NAME)

	# prepara input image
	img_org = cv2.imread('input.jpg')
	#	cv2.imshow('image', img)
	pil_img = cv2pil(cv2.resize(img_org, (300, 300)))

	# Run inference
	ans = engine.DetectWithImage(pil_img, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)

	# Retrieve results
	if ans:
		for obj in ans:
			print ('-----------------------------------------')
			print('label = ', label2string[obj.label_id])
			print ('score = ', obj.score)
			box = obj.bounding_box.flatten().tolist()
			print ('box = ', box)

			x0 = int(box[0] * img_org.shape[1])
			y0 = int(box[1] * img_org.shape[0])
			x1 = int(box[2] * img_org.shape[1])
			y1 = int(box[3] * img_org.shape[0])
			cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
			cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
			cv2.putText(img_org,
					str(label2string[obj.label_id]),
					(x0, y0),
					cv2.FONT_HERSHEY_SIMPLEX,
					1,
					(255, 255, 255),
					2)

		cv2.imshow('image', img_org)
		cv2.waitKey(0)
		cv2.destroyAllWindows()

def detect_from_camera():
	# Load model and prepare TPU engine
	engine = DetectionEngine(MODEL_NAME)

	cap = cv2.VideoCapture(0)

	while True:
		start = time.time()
		# capture image
		ret, img_org = cap.read()
#		cv2.imshow('image', img_org)
		key = cv2.waitKey(1)
		if key == 27: # ESC
			break

		pil_img = cv2pil(cv2.resize(img_org, (300, 300)))

		# Run inference
		ans = engine.DetectWithImage(pil_img, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)

		# Retrieve results
		if ans:
			for obj in ans:
				print ('-----------------------------------------')
				print('label = ', label2string[obj.label_id])
				print ('score = ', obj.score)
				box = obj.bounding_box.flatten().tolist()
				print ('box = ', box)

				x0 = int(box[0] * img_org.shape[1])
				y0 = int(box[1] * img_org.shape[0])
				x1 = int(box[2] * img_org.shape[1])
				y1 = int(box[3] * img_org.shape[0])
				cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
				cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
				cv2.putText(img_org,
						str(label2string[obj.label_id]),
						(x0, y0),
						cv2.FONT_HERSHEY_SIMPLEX,
						1,
						(255, 255, 255),
						2)

		# Draw the result
		cv2.imshow('image', img_org)
		if cv2.waitKey(1) & 0xFF == ord("q"):
			break

		print('inference time = ', engine.get_inference_time() , '[msec]')
		elapsed_time = time.time() - start
		print('total time = ', elapsed_time * 1000 , '[msec] (', 1 / elapsed_time, ' fps)')

	cv2.destroyAllWindows()


if __name__ == '__main__':
	detect_from_camera()
	# detect_from_image()


'''
for Jetson Nano

sudo apt-get install libjpeg-dev
pip3 install pillow

cd ~/
wget https://dl.google.com/coral/edgetpu_api/edgetpu_api_latest.tar.gz -O edgetpu_api.tar.gz --trust-server-names
tar xzf edgetpu_api.tar.gz
cd edgetpu_api
bash ./install.sh
sudo ln -s /usr/local/lib/python3.6/dist-packages/edgetpu/swig/_edgetpu_cpp_wrapper.cpython-35m-aarch64-linux-gnu.so  /usr/local/lib/python3.6/dist-packages/edgetpu/swig/_edgetpu_cpp_wrapper.cpython-36m-aarch64-linux-gnu.so

python3 cv_detection.py

https://dl.google.com/coral/canned_models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite
'''

## pi_detection_pil.py
import time
from PIL import Image, ImageDraw, ImageFont
import numpy
from edgetpu.detection.engine import DetectionEngine

MODEL_NAME = "detect_edgetpu.tflite"
MODEL_WIDTH = 300
MODEL_HEIGHT = 300

def detect_from_image():
	### Load model and prepare TPU engine
	engine = DetectionEngine(MODEL_NAME)

	### prepara input image
	img_org = Image.open('input.jpg')
	draw = ImageDraw.Draw(img_org)
	img_resized = img_org.resize((MODEL_WIDTH, MODEL_HEIGHT))
	input_tensor = numpy.asarray(img_resized).flatten()

	### Run inference
	# ans = engine.DetectWithImage(img_resized, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)
	ans = engine.DetectWithInputTensor(input_tensor, threshold=0.5, top_k=10)

	### Retrieve results
	if ans:
		for obj in ans:
			print('-----------------------------------------')
			print('label = ', obj.label_id)
			print('score = ', obj.score)
			box = obj.bounding_box.flatten().tolist()
			print('box = ', box)

			x0 = int(box[0] * img_org.size[0])
			y0 = int(box[1] * img_org.size[1])
			x1 = int(box[2] * img_org.size[0])
			y1 = int(box[3] * img_org.size[1])
			draw.rectangle((x0, y0, x1, y1), fill=None, outline=(0, 255, 0))
			draw.text((x0, y0), str(obj.label_id), fill=(0, 255, 0))
	img_org.show()

	### Time Measurement
	start = time.time()
	num_measurement = 100
	for i in range(num_measurement):
		engine.RunInference(input_tensor)
	elapsed_time = time.time() - start
	print ("elapsed_time:{0}".format(1000 * elapsed_time / num_measurement) + "[msec]")

if __name__ == '__main__':
	detect_from_image()


'''
for Raspberry Pi

sudo apt install imagemagick
pip3 install pillow

python3 jetson_detection_cv.py

https://dl.google.com/coral/canned_models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite

'''

## picam_capture.py
import picamera
import picamera.array
import cv2

with picamera.PiCamera() as camera:
	with picamera.array.PiRGBArray(camera) as stream:
		camera.resolution = (320, 240)
		while True:
			camera.capture(stream, 'bgr', use_video_port=True)
			cv2.imshow('frame', stream.array)

			if cv2.waitKey(1) & 0xFF == ord("q"):
				break

			stream.seek(0)
			stream.truncate()
		cv2.destroyAllWindows()


# sudo apt install python3-picamera
# python3 picam_capture.py

## picam_detection_oled_TPU.py
import time
import picamera
import picamera.array

from PIL import Image
from luma.core.interface.serial import i2c, spi
from luma.core.render import canvas
from luma.oled.device import ssd1306, ssd1309, ssd1325, ssd1331, sh1106

from edgetpu.detection.engine import DetectionEngine

MODEL_NAME = "mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite"

label2string = \
{
	0:   "person",
	1:   "bicycle",
	2:   "car",
	3:   "motorcycle",
	4:   "airplane",
	5:   "bus",
	6:   "train",
	7:   "truck",
	8:   "boat",
	9:   "traffic light",
	10:  "fire hydrant",
	12:  "stop sign",
	13:  "parking meter",
	14:  "bench",
	15:  "bird",
	16:  "cat",
	17:  "dog",
	18:  "horse",
	19:  "sheep",
	20:  "cow",
	21:  "elephant",
	22:  "bear",
	23:  "zebra",
	24:  "giraffe",
	26:  "backpack",
	27:  "umbrella",
	30:  "handbag",
	31:  "tie",
	32:  "suitcase",
	33:  "frisbee",
	34:  "skis",
	35:  "snowboard",
	36:  "sports ball",
	37:  "kite",
	38:  "baseball bat",
	39:  "baseball glove",
	40:  "skateboard",
	41:  "surfboard",
	42:  "tennis racket",
	43:  "bottle",
	45:  "wine glass",
	46:  "cup",
	47:  "fork",
	48:  "knife",
	49:  "spoon",
	50:  "bowl",
	51:  "banana",
	52:  "apple",
	53:  "sandwich",
	54:  "orange",
	55:  "broccoli",
	56:  "carrot",
	57:  "hot dog",
	58:  "pizza",
	59:  "donut",
	60:  "cake",
	61:  "chair",
	62:  "couch",
	63:  "potted plant",
	64:  "bed",
	66:  "dining table",
	69:  "toilet",
	71:  "tv",
	72:  "laptop",
	73:  "mouse",
	74:  "remote",
	75:  "keyboard",
	76:  "cell phone",
	77:  "microwave",
	78:  "oven",
	79:  "toaster",
	80:  "sink",
	81:  "refrigerator",
	83:  "book",
	84:  "clock",
	85:  "vase",
	86:  "scissors",
	87:  "teddy bear",
	88:  "hair drier",
	89:  "toothbrush",
}

def detect_from_camera():
	# Load model and prepare TPU engine
	engine = DetectionEngine(MODEL_NAME)

	# Prepare OLED
	serial = i2c(port=1, address=0x3C)
	device = sh1106(serial)

	with picamera.PiCamera() as camera:
		with picamera.array.PiRGBArray(camera) as stream:
			camera.resolution = (640, 480)
			while True:
				start = time.time()
				# capture image
				camera.capture(stream, 'rgb', use_video_port=True)
				pil_img = Image.fromarray(stream.array)
				pil_img = pil_img.resize((300, 300), Image.NEAREST)

				# Run inference
				ans = engine.DetectWithImage(pil_img, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)

				# Retrieve results
				with canvas(device) as draw:
					if ans:
						for obj in ans:
							print ('-----------------------------------------')
							print('label = ', label2string[obj.label_id])
							print ('score = ', obj.score)
							box = obj.bounding_box.flatten().tolist()
							print ('box = ', box)

							x0 = int(box[0] * 128)
							y0 = int(box[1] * 64)
							x1 = int(box[2] * 128)
							y1 = int(box[3] * 64)

							draw.rectangle((x0, y0, x1, y1), outline="white", fill=None)
							draw.text((x0, y0), label2string[obj.label_id], fill="white")


				print('inference time = ', engine.get_inference_time() , '[msec]')
				elapsed_time = time.time() - start
				print('total time = ', elapsed_time * 1000 , '[msec] (', 1 / elapsed_time, ' fps)')

				stream.seek(0)
				stream.truncate()


if __name__ == '__main__':
	detect_from_camera()


'''
for Raspberry Pi Zero W

Connect OLED(SH1106) i2c to 3(SDA) and 5(SCL) pin

```
sudo apt-get install i2c-tools
sudo raspi-config
# enable camera and i2c
i2cdetect -y 1


wget https://github.com/google-coral/edgetpu-platforms/releases/download/v1.9.2/edgetpu_api_1.9.2.tar.gz
tar xzf edgetpu_api_1.9.2.tar.gz
cd edgetpu_api/
./install.sh

wget https://dl.google.com/coral/canned_models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite

sudo apt install python3-picamera

sudo apt-get install  libfreetype6-dev libjpeg-dev build-essential
sudo  pip3 install  luma.oled

```

'''

## picam_detection_TPU.py
import time
import picamera
import picamera.array
import cv2
from PIL import Image
from edgetpu.detection.engine import DetectionEngine

MODEL_NAME = "mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite"

def cv2pil(image_cv):
	image_cv = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)
	image_pil = Image.fromarray(image_cv)
	image_pil = image_pil.convert('RGB')
	return image_pil

label2string = \
{
	0:   "person",
	1:   "bicycle",
	2:   "car",
	3:   "motorcycle",
	4:   "airplane",
	5:   "bus",
	6:   "train",
	7:   "truck",
	8:   "boat",
	9:   "traffic light",
	10:  "fire hydrant",
	12:  "stop sign",
	13:  "parking meter",
	14:  "bench",
	15:  "bird",
	16:  "cat",
	17:  "dog",
	18:  "horse",
	19:  "sheep",
	20:  "cow",
	21:  "elephant",
	22:  "bear",
	23:  "zebra",
	24:  "giraffe",
	26:  "backpack",
	27:  "umbrella",
	30:  "handbag",
	31:  "tie",
	32:  "suitcase",
	33:  "frisbee",
	34:  "skis",
	35:  "snowboard",
	36:  "sports ball",
	37:  "kite",
	38:  "baseball bat",
	39:  "baseball glove",
	40:  "skateboard",
	41:  "surfboard",
	42:  "tennis racket",
	43:  "bottle",
	45:  "wine glass",
	46:  "cup",
	47:  "fork",
	48:  "knife",
	49:  "spoon",
	50:  "bowl",
	51:  "banana",
	52:  "apple",
	53:  "sandwich",
	54:  "orange",
	55:  "broccoli",
	56:  "carrot",
	57:  "hot dog",
	58:  "pizza",
	59:  "donut",
	60:  "cake",
	61:  "chair",
	62:  "couch",
	63:  "potted plant",
	64:  "bed",
	66:  "dining table",
	69:  "toilet",
	71:  "tv",
	72:  "laptop",
	73:  "mouse",
	74:  "remote",
	75:  "keyboard",
	76:  "cell phone",
	77:  "microwave",
	78:  "oven",
	79:  "toaster",
	80:  "sink",
	81:  "refrigerator",
	83:  "book",
	84:  "clock",
	85:  "vase",
	86:  "scissors",
	87:  "teddy bear",
	88:  "hair drier",
	89:  "toothbrush",
}

def detect_from_image():
	# Load model and prepare TPU engine
	engine = DetectionEngine(MODEL_NAME)

	# prepara input image
	img_org = cv2.imread('input.jpg')
	#	cv2.imshow('image', img)
	pil_img = cv2pil(cv2.resize(img_org, (300, 300)))

	# Run inference
	ans = engine.DetectWithImage(pil_img, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)

	# Retrieve results
	if ans:
		for obj in ans:
			print ('-----------------------------------------')
			print('label = ', label2string[obj.label_id])
			print ('score = ', obj.score)
			box = obj.bounding_box.flatten().tolist()
			print ('box = ', box)

			x0 = int(box[0] * img_org.shape[1])
			y0 = int(box[1] * img_org.shape[0])
			x1 = int(box[2] * img_org.shape[1])
			y1 = int(box[3] * img_org.shape[0])
			cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
			cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
			cv2.putText(img_org,
					str(label2string[obj.label_id]),
					(x0, y0),
					cv2.FONT_HERSHEY_SIMPLEX,
					1,
					(255, 255, 255),
					2)

		cv2.imshow('image', img_org)
		cv2.waitKey(0)
		cv2.destroyAllWindows()

def detect_from_camera():
	# Load model and prepare TPU engine
	engine = DetectionEngine(MODEL_NAME)

	# Start camera capturing
	with picamera.PiCamera() as camera:
		with picamera.array.PiRGBArray(camera) as stream:
			camera.resolution = (640, 480)
			while True:
				start = time.time()
				# capture image
				camera.capture(stream, 'bgr', use_video_port=True)
				image_display = stream.array
				pil_img = cv2pil(cv2.resize(stream.array, (300, 300)))

				# Run inference
				ans = engine.DetectWithImage(pil_img, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)

				# Retrieve results
				if ans:
					for obj in ans:
						print ('-----------------------------------------')
						print('label = ', label2string[obj.label_id])
						print ('score = ', obj.score)
						box = obj.bounding_box.flatten().tolist()
						print ('box = ', box)

						x0 = int(box[0] * image_display.shape[1])
						y0 = int(box[1] * image_display.shape[0])
						x1 = int(box[2] * image_display.shape[1])
						y1 = int(box[3] * image_display.shape[0])
						cv2.rectangle(image_display, (x0, y0), (x1, y1), (255, 0, 0), 2)
						cv2.rectangle(image_display, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
						cv2.putText(image_display,
								str(label2string[obj.label_id]),
								(x0, y0),
								cv2.FONT_HERSHEY_SIMPLEX,
								1,
								(255, 255, 255),
								2)

				# Draw the result
				cv2.imshow('image', image_display)
				if cv2.waitKey(1) & 0xFF == ord("q"):
					break

				print('inference time = ', engine.get_inference_time() , '[msec]')
				elapsed_time = time.time() - start
				print('total time = ', elapsed_time * 1000 , '[msec] (', 1 / elapsed_time, ' fps)')
				stream.seek(0)
				stream.truncate()
			cv2.destroyAllWindows()


if __name__ == '__main__':
	detect_from_camera()
	detect_from_image()


'''
for Raspberry Pi 3

cd ~/
wget https://dl.google.com/coral/edgetpu_api/edgetpu_api_latest.tar.gz -O edgetpu_api.tar.gz --trust-server-names
tar xzf edgetpu_api.tar.gz
cd edgetpu_api
bash ./install.sh

sudo apt install python3-picamera
python3 picam_capture.py

https://dl.google.com/coral/canned_models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite
'''
	# -- coding: utf-8 --
	import cv2
	import tensorflow as tf
	import numpy as np

	# https://www.tensorflow.org/lite/guide/hosted_models
	# http://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip


	def detect_from_camera():
	# load model
	interpreter = tf.lite.Interpreter(model_path="detect.tflite")
	interpreter.allocate_tensors()
	input_details = interpreter.get_input_details()
	output_details = interpreter.get_output_details()

	cap = cv2.VideoCapture(0) # 0はカメラのデバイス番号
	while True:
	# capture image
	ret, img_org = cap.read()
	# cv2.imshow('image', img_org)
	key = cv2.waitKey(1)
	if key == 27: # ESC
	break

	# prepara input image
	img = cv2.cvtColor(img_org, cv2.COLOR_BGR2RGB)
	img = cv2.resize(img, (300, 300))
	img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2]) # (1, 300, 300, 3)
	img = img.astype(np.uint8)

	# set input tensor
	interpreter.set_tensor(input_details[0]['index'], img)

	# run
	interpreter.invoke()

	# get outpu tensor
	boxes = interpreter.get_tensor(output_details[0]['index'])
	labels = interpreter.get_tensor(output_details[1]['index'])
	scores = interpreter.get_tensor(output_details[2]['index'])
	num = interpreter.get_tensor(output_details[3]['index'])

	for i in range(boxes.shape[1]):
	if scores[0, i] > 0.5:
	box = boxes[0, i, :]
	x0 = int(box[1] * img_org.shape[1])
	y0 = int(box[0] * img_org.shape[0])
	x1 = int(box[3] * img_org.shape[1])
	y1 = int(box[2] * img_org.shape[0])
	box = box.astype(np.int)
	cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
	cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
	cv2.putText(img_org,
	str(int(labels[0, i])),
	(x0, y0),
	cv2.FONT_HERSHEY_SIMPLEX,
	1,
	(255, 255, 255),
	2)

	# cv2.imwrite('output.jpg', img_org)
	cv2.imshow('image', img_org)

	cap.release()
	cv2.destroyAllWindows()


	def detect_from_image():
	# prepara input image
	img_org = cv2.imread('input.jpg')
	# cv2.imshow('image', img)
	img = cv2.cvtColor(img_org, cv2.COLOR_BGR2RGB)
	img = cv2.resize(img, (300, 300))
	img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2]) # (1, 300, 300, 3)
	img = img.astype(np.uint8)

	# load model
	interpreter = tf.lite.Interpreter(model_path="detect.tflite")
	interpreter.allocate_tensors()
	input_details = interpreter.get_input_details()
	output_details = interpreter.get_output_details()

	# set input tensor
	interpreter.set_tensor(input_details[0]['index'], img)

	# run
	interpreter.invoke()

	# get outpu tensor
	boxes = interpreter.get_tensor(output_details[0]['index'])
	labels = interpreter.get_tensor(output_details[1]['index'])
	scores = interpreter.get_tensor(output_details[2]['index'])
	num = interpreter.get_tensor(output_details[3]['index'])

	for i in range(boxes.shape[1]):
	if scores[0, i] > 0.5:
	box = boxes[0, i, :]
	x0 = int(box[1] * img_org.shape[1])
	y0 = int(box[0] * img_org.shape[0])
	x1 = int(box[3] * img_org.shape[1])
	y1 = int(box[2] * img_org.shape[0])
	box = box.astype(np.int)
	cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
	cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
	cv2.putText(img_org,
	str(int(labels[0, i])),
	(x0, y0),
	cv2.FONT_HERSHEY_SIMPLEX,
	1,
	(255, 255, 255),
	2)

	# cv2.imwrite('output.jpg', img_org)
	cv2.imshow('image', img_org)
	cv2.waitKey(0)
	cv2.destroyAllWindows()


	if __name__ == '__main__':
	detect_from_camera()
	detect_from_image()
	import time
	# import picamera
	# import picamera.array
	import cv2
	from PIL import Image
	from edgetpu.detection.engine import DetectionEngine

	MODEL_NAME = "mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite"

	def cv2pil(image_cv):
	image_cv = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)
	image_pil = Image.fromarray(image_cv)
	image_pil = image_pil.convert('RGB')
	return image_pil

	label2string = \
	{
	0: "person",
	1: "bicycle",
	2: "car",
	3: "motorcycle",
	4: "airplane",
	5: "bus",
	6: "train",
	7: "truck",
	8: "boat",
	9: "traffic light",
	10: "fire hydrant",
	12: "stop sign",
	13: "parking meter",
	14: "bench",
	15: "bird",
	16: "cat",
	17: "dog",
	18: "horse",
	19: "sheep",
	20: "cow",
	21: "elephant",
	22: "bear",
	23: "zebra",
	24: "giraffe",
	26: "backpack",
	27: "umbrella",
	30: "handbag",
	31: "tie",
	32: "suitcase",
	33: "frisbee",
	34: "skis",
	35: "snowboard",
	36: "sports ball",
	37: "kite",
	38: "baseball bat",
	39: "baseball glove",
	40: "skateboard",
	41: "surfboard",
	42: "tennis racket",
	43: "bottle",
	45: "wine glass",
	46: "cup",
	47: "fork",
	48: "knife",
	49: "spoon",
	50: "bowl",
	51: "banana",
	52: "apple",
	53: "sandwich",
	54: "orange",
	55: "broccoli",
	56: "carrot",
	57: "hot dog",
	58: "pizza",
	59: "donut",
	60: "cake",
	61: "chair",
	62: "couch",
	63: "potted plant",
	64: "bed",
	66: "dining table",
	69: "toilet",
	71: "tv",
	72: "laptop",
	73: "mouse",
	74: "remote",
	75: "keyboard",
	76: "cell phone",
	77: "microwave",
	78: "oven",
	79: "toaster",
	80: "sink",
	81: "refrigerator",
	83: "book",
	84: "clock",
	85: "vase",
	86: "scissors",
	87: "teddy bear",
	88: "hair drier",
	89: "toothbrush",
	}

	def detect_from_image():
	# Load model and prepare TPU engine
	engine = DetectionEngine(MODEL_NAME)

	# prepara input image
	img_org = cv2.imread('input.jpg')
	# cv2.imshow('image', img)
	pil_img = cv2pil(cv2.resize(img_org, (300, 300)))

	# Run inference
	ans = engine.DetectWithImage(pil_img, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)

	# Retrieve results
	if ans:
	for obj in ans:
	print ('-----------------------------------------')
	print('label = ', label2string[obj.label_id])
	print ('score = ', obj.score)
	box = obj.bounding_box.flatten().tolist()
	print ('box = ', box)

	x0 = int(box[0] * img_org.shape[1])
	y0 = int(box[1] * img_org.shape[0])
	x1 = int(box[2] * img_org.shape[1])
	y1 = int(box[3] * img_org.shape[0])
	cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
	cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
	cv2.putText(img_org,
	str(label2string[obj.label_id]),
	(x0, y0),
	cv2.FONT_HERSHEY_SIMPLEX,
	1,
	(255, 255, 255),
	2)

	cv2.imshow('image', img_org)
	cv2.waitKey(0)
	cv2.destroyAllWindows()

	def detect_from_camera():
	# Load model and prepare TPU engine
	engine = DetectionEngine(MODEL_NAME)

	cap = cv2.VideoCapture(0)

	while True:
	start = time.time()
	# capture image
	ret, img_org = cap.read()
	# cv2.imshow('image', img_org)
	key = cv2.waitKey(1)
	if key == 27: # ESC
	break

	pil_img = cv2pil(cv2.resize(img_org, (300, 300)))

	# Run inference
	ans = engine.DetectWithImage(pil_img, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)

	# Retrieve results
	if ans:
	for obj in ans:
	print ('-----------------------------------------')
	print('label = ', label2string[obj.label_id])
	print ('score = ', obj.score)
	box = obj.bounding_box.flatten().tolist()
	print ('box = ', box)

	x0 = int(box[0] * img_org.shape[1])
	y0 = int(box[1] * img_org.shape[0])
	x1 = int(box[2] * img_org.shape[1])
	y1 = int(box[3] * img_org.shape[0])
	cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
	cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
	cv2.putText(img_org,
	str(label2string[obj.label_id]),
	(x0, y0),
	cv2.FONT_HERSHEY_SIMPLEX,
	1,
	(255, 255, 255),
	2)

	# Draw the result
	cv2.imshow('image', img_org)
	if cv2.waitKey(1) & 0xFF == ord("q"):
	break

	print('inference time = ', engine.get_inference_time() , '[msec]')
	elapsed_time = time.time() - start
	print('total time = ', elapsed_time * 1000 , '[msec] (', 1 / elapsed_time, ' fps)')

	cv2.destroyAllWindows()


	if __name__ == '__main__':
	detect_from_camera()
	# detect_from_image()



	'''
	for Jetson Nano

	sudo apt-get install libjpeg-dev
	pip3 install pillow

	cd ~/
	wget https://dl.google.com/coral/edgetpu_api/edgetpu_api_latest.tar.gz -O edgetpu_api.tar.gz --trust-server-names
	tar xzf edgetpu_api.tar.gz
	cd edgetpu_api
	bash ./install.sh
	sudo ln -s /usr/local/lib/python3.6/dist-packages/edgetpu/swig/_edgetpu_cpp_wrapper.cpython-35m-aarch64-linux-gnu.so /usr/local/lib/python3.6/dist-packages/edgetpu/swig/_edgetpu_cpp_wrapper.cpython-36m-aarch64-linux-gnu.so

	python3 cv_detection.py

	https://dl.google.com/coral/canned_models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite
	'''
	import time
	from PIL import Image, ImageDraw, ImageFont
	import numpy
	from edgetpu.detection.engine import DetectionEngine

	MODEL_NAME = "detect_edgetpu.tflite"
	MODEL_WIDTH = 300
	MODEL_HEIGHT = 300

	def detect_from_image():
	### Load model and prepare TPU engine
	engine = DetectionEngine(MODEL_NAME)

	### prepara input image
	img_org = Image.open('input.jpg')
	draw = ImageDraw.Draw(img_org)
	img_resized = img_org.resize((MODEL_WIDTH, MODEL_HEIGHT))
	input_tensor = numpy.asarray(img_resized).flatten()

	### Run inference
	# ans = engine.DetectWithImage(img_resized, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)
	ans = engine.DetectWithInputTensor(input_tensor, threshold=0.5, top_k=10)

	### Retrieve results
	if ans:
	for obj in ans:
	print('-----------------------------------------')
	print('label = ', obj.label_id)
	print('score = ', obj.score)
	box = obj.bounding_box.flatten().tolist()
	print('box = ', box)

	x0 = int(box[0] * img_org.size[0])
	y0 = int(box[1] * img_org.size[1])
	x1 = int(box[2] * img_org.size[0])
	y1 = int(box[3] * img_org.size[1])
	draw.rectangle((x0, y0, x1, y1), fill=None, outline=(0, 255, 0))
	draw.text((x0, y0), str(obj.label_id), fill=(0, 255, 0))
	img_org.show()

	### Time Measurement
	start = time.time()
	num_measurement = 100
	for i in range(num_measurement):
	engine.RunInference(input_tensor)
	elapsed_time = time.time() - start
	print ("elapsed_time:{0}".format(1000 * elapsed_time / num_measurement) + "[msec]")

	if __name__ == '__main__':
	detect_from_image()



	'''
	for Raspberry Pi

	sudo apt install imagemagick
	pip3 install pillow

	python3 jetson_detection_cv.py

	https://dl.google.com/coral/canned_models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite

	'''
	import picamera
	import picamera.array
	import cv2

	with picamera.PiCamera() as camera:
	with picamera.array.PiRGBArray(camera) as stream:
	camera.resolution = (320, 240)
	while True:
	camera.capture(stream, 'bgr', use_video_port=True)
	cv2.imshow('frame', stream.array)

	if cv2.waitKey(1) & 0xFF == ord("q"):
	break

	stream.seek(0)
	stream.truncate()
	cv2.destroyAllWindows()


	# sudo apt install python3-picamera
	# python3 picam_capture.py