Skip to content

Instantly share code, notes, and snippets.

@iwatake2222
Last active October 10, 2023 21:58
Show Gist options
  • Star 16 You must be signed in to star a gist
  • Fork 6 You must be signed in to fork a gist
  • Save iwatake2222/e4c48567b1013cf31de1cea36c4c061c to your computer and use it in GitHub Desktop.
Save iwatake2222/e4c48567b1013cf31de1cea36c4c061c to your computer and use it in GitHub Desktop.
Object detection using MobileNet SSD with tensorflow lite (with and without Edge TPU)
# -*- coding: utf-8 -*-
import cv2
import tensorflow as tf
import numpy as np
# https://www.tensorflow.org/lite/guide/hosted_models
# http://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip
def detect_from_camera():
# load model
interpreter = tf.lite.Interpreter(model_path="detect.tflite")
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
cap = cv2.VideoCapture(0) # 0はカメラのデバイス番号
while True:
# capture image
ret, img_org = cap.read()
# cv2.imshow('image', img_org)
key = cv2.waitKey(1)
if key == 27: # ESC
break
# prepara input image
img = cv2.cvtColor(img_org, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (300, 300))
img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2]) # (1, 300, 300, 3)
img = img.astype(np.uint8)
# set input tensor
interpreter.set_tensor(input_details[0]['index'], img)
# run
interpreter.invoke()
# get outpu tensor
boxes = interpreter.get_tensor(output_details[0]['index'])
labels = interpreter.get_tensor(output_details[1]['index'])
scores = interpreter.get_tensor(output_details[2]['index'])
num = interpreter.get_tensor(output_details[3]['index'])
for i in range(boxes.shape[1]):
if scores[0, i] > 0.5:
box = boxes[0, i, :]
x0 = int(box[1] * img_org.shape[1])
y0 = int(box[0] * img_org.shape[0])
x1 = int(box[3] * img_org.shape[1])
y1 = int(box[2] * img_org.shape[0])
box = box.astype(np.int)
cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
cv2.putText(img_org,
str(int(labels[0, i])),
(x0, y0),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 255, 255),
2)
# cv2.imwrite('output.jpg', img_org)
cv2.imshow('image', img_org)
cap.release()
cv2.destroyAllWindows()
def detect_from_image():
# prepara input image
img_org = cv2.imread('input.jpg')
# cv2.imshow('image', img)
img = cv2.cvtColor(img_org, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (300, 300))
img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2]) # (1, 300, 300, 3)
img = img.astype(np.uint8)
# load model
interpreter = tf.lite.Interpreter(model_path="detect.tflite")
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# set input tensor
interpreter.set_tensor(input_details[0]['index'], img)
# run
interpreter.invoke()
# get outpu tensor
boxes = interpreter.get_tensor(output_details[0]['index'])
labels = interpreter.get_tensor(output_details[1]['index'])
scores = interpreter.get_tensor(output_details[2]['index'])
num = interpreter.get_tensor(output_details[3]['index'])
for i in range(boxes.shape[1]):
if scores[0, i] > 0.5:
box = boxes[0, i, :]
x0 = int(box[1] * img_org.shape[1])
y0 = int(box[0] * img_org.shape[0])
x1 = int(box[3] * img_org.shape[1])
y1 = int(box[2] * img_org.shape[0])
box = box.astype(np.int)
cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
cv2.putText(img_org,
str(int(labels[0, i])),
(x0, y0),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 255, 255),
2)
# cv2.imwrite('output.jpg', img_org)
cv2.imshow('image', img_org)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
detect_from_camera()
detect_from_image()
import time
# import picamera
# import picamera.array
import cv2
from PIL import Image
from edgetpu.detection.engine import DetectionEngine
MODEL_NAME = "mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite"
def cv2pil(image_cv):
image_cv = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)
image_pil = Image.fromarray(image_cv)
image_pil = image_pil.convert('RGB')
return image_pil
label2string = \
{
0: "person",
1: "bicycle",
2: "car",
3: "motorcycle",
4: "airplane",
5: "bus",
6: "train",
7: "truck",
8: "boat",
9: "traffic light",
10: "fire hydrant",
12: "stop sign",
13: "parking meter",
14: "bench",
15: "bird",
16: "cat",
17: "dog",
18: "horse",
19: "sheep",
20: "cow",
21: "elephant",
22: "bear",
23: "zebra",
24: "giraffe",
26: "backpack",
27: "umbrella",
30: "handbag",
31: "tie",
32: "suitcase",
33: "frisbee",
34: "skis",
35: "snowboard",
36: "sports ball",
37: "kite",
38: "baseball bat",
39: "baseball glove",
40: "skateboard",
41: "surfboard",
42: "tennis racket",
43: "bottle",
45: "wine glass",
46: "cup",
47: "fork",
48: "knife",
49: "spoon",
50: "bowl",
51: "banana",
52: "apple",
53: "sandwich",
54: "orange",
55: "broccoli",
56: "carrot",
57: "hot dog",
58: "pizza",
59: "donut",
60: "cake",
61: "chair",
62: "couch",
63: "potted plant",
64: "bed",
66: "dining table",
69: "toilet",
71: "tv",
72: "laptop",
73: "mouse",
74: "remote",
75: "keyboard",
76: "cell phone",
77: "microwave",
78: "oven",
79: "toaster",
80: "sink",
81: "refrigerator",
83: "book",
84: "clock",
85: "vase",
86: "scissors",
87: "teddy bear",
88: "hair drier",
89: "toothbrush",
}
def detect_from_image():
# Load model and prepare TPU engine
engine = DetectionEngine(MODEL_NAME)
# prepara input image
img_org = cv2.imread('input.jpg')
# cv2.imshow('image', img)
pil_img = cv2pil(cv2.resize(img_org, (300, 300)))
# Run inference
ans = engine.DetectWithImage(pil_img, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)
# Retrieve results
if ans:
for obj in ans:
print ('-----------------------------------------')
print('label = ', label2string[obj.label_id])
print ('score = ', obj.score)
box = obj.bounding_box.flatten().tolist()
print ('box = ', box)
x0 = int(box[0] * img_org.shape[1])
y0 = int(box[1] * img_org.shape[0])
x1 = int(box[2] * img_org.shape[1])
y1 = int(box[3] * img_org.shape[0])
cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
cv2.putText(img_org,
str(label2string[obj.label_id]),
(x0, y0),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 255, 255),
2)
cv2.imshow('image', img_org)
cv2.waitKey(0)
cv2.destroyAllWindows()
def detect_from_camera():
# Load model and prepare TPU engine
engine = DetectionEngine(MODEL_NAME)
cap = cv2.VideoCapture(0)
while True:
start = time.time()
# capture image
ret, img_org = cap.read()
# cv2.imshow('image', img_org)
key = cv2.waitKey(1)
if key == 27: # ESC
break
pil_img = cv2pil(cv2.resize(img_org, (300, 300)))
# Run inference
ans = engine.DetectWithImage(pil_img, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)
# Retrieve results
if ans:
for obj in ans:
print ('-----------------------------------------')
print('label = ', label2string[obj.label_id])
print ('score = ', obj.score)
box = obj.bounding_box.flatten().tolist()
print ('box = ', box)
x0 = int(box[0] * img_org.shape[1])
y0 = int(box[1] * img_org.shape[0])
x1 = int(box[2] * img_org.shape[1])
y1 = int(box[3] * img_org.shape[0])
cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
cv2.putText(img_org,
str(label2string[obj.label_id]),
(x0, y0),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 255, 255),
2)
# Draw the result
cv2.imshow('image', img_org)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
print('inference time = ', engine.get_inference_time() , '[msec]')
elapsed_time = time.time() - start
print('total time = ', elapsed_time * 1000 , '[msec] (', 1 / elapsed_time, ' fps)')
cv2.destroyAllWindows()
if __name__ == '__main__':
detect_from_camera()
# detect_from_image()
'''
for Jetson Nano
sudo apt-get install libjpeg-dev
pip3 install pillow
cd ~/
wget https://dl.google.com/coral/edgetpu_api/edgetpu_api_latest.tar.gz -O edgetpu_api.tar.gz --trust-server-names
tar xzf edgetpu_api.tar.gz
cd edgetpu_api
bash ./install.sh
sudo ln -s /usr/local/lib/python3.6/dist-packages/edgetpu/swig/_edgetpu_cpp_wrapper.cpython-35m-aarch64-linux-gnu.so /usr/local/lib/python3.6/dist-packages/edgetpu/swig/_edgetpu_cpp_wrapper.cpython-36m-aarch64-linux-gnu.so
python3 cv_detection.py
https://dl.google.com/coral/canned_models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite
'''
import time
from PIL import Image, ImageDraw, ImageFont
import numpy
from edgetpu.detection.engine import DetectionEngine
MODEL_NAME = "detect_edgetpu.tflite"
MODEL_WIDTH = 300
MODEL_HEIGHT = 300
def detect_from_image():
### Load model and prepare TPU engine
engine = DetectionEngine(MODEL_NAME)
### prepara input image
img_org = Image.open('input.jpg')
draw = ImageDraw.Draw(img_org)
img_resized = img_org.resize((MODEL_WIDTH, MODEL_HEIGHT))
input_tensor = numpy.asarray(img_resized).flatten()
### Run inference
# ans = engine.DetectWithImage(img_resized, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)
ans = engine.DetectWithInputTensor(input_tensor, threshold=0.5, top_k=10)
### Retrieve results
if ans:
for obj in ans:
print('-----------------------------------------')
print('label = ', obj.label_id)
print('score = ', obj.score)
box = obj.bounding_box.flatten().tolist()
print('box = ', box)
x0 = int(box[0] * img_org.size[0])
y0 = int(box[1] * img_org.size[1])
x1 = int(box[2] * img_org.size[0])
y1 = int(box[3] * img_org.size[1])
draw.rectangle((x0, y0, x1, y1), fill=None, outline=(0, 255, 0))
draw.text((x0, y0), str(obj.label_id), fill=(0, 255, 0))
img_org.show()
### Time Measurement
start = time.time()
num_measurement = 100
for i in range(num_measurement):
engine.RunInference(input_tensor)
elapsed_time = time.time() - start
print ("elapsed_time:{0}".format(1000 * elapsed_time / num_measurement) + "[msec]")
if __name__ == '__main__':
detect_from_image()
'''
for Raspberry Pi
sudo apt install imagemagick
pip3 install pillow
python3 jetson_detection_cv.py
https://dl.google.com/coral/canned_models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite
'''
import picamera
import picamera.array
import cv2
with picamera.PiCamera() as camera:
with picamera.array.PiRGBArray(camera) as stream:
camera.resolution = (320, 240)
while True:
camera.capture(stream, 'bgr', use_video_port=True)
cv2.imshow('frame', stream.array)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
stream.seek(0)
stream.truncate()
cv2.destroyAllWindows()
# sudo apt install python3-picamera
# python3 picam_capture.py
import time
import picamera
import picamera.array
from PIL import Image
from luma.core.interface.serial import i2c, spi
from luma.core.render import canvas
from luma.oled.device import ssd1306, ssd1309, ssd1325, ssd1331, sh1106
from edgetpu.detection.engine import DetectionEngine
MODEL_NAME = "mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite"
label2string = \
{
0: "person",
1: "bicycle",
2: "car",
3: "motorcycle",
4: "airplane",
5: "bus",
6: "train",
7: "truck",
8: "boat",
9: "traffic light",
10: "fire hydrant",
12: "stop sign",
13: "parking meter",
14: "bench",
15: "bird",
16: "cat",
17: "dog",
18: "horse",
19: "sheep",
20: "cow",
21: "elephant",
22: "bear",
23: "zebra",
24: "giraffe",
26: "backpack",
27: "umbrella",
30: "handbag",
31: "tie",
32: "suitcase",
33: "frisbee",
34: "skis",
35: "snowboard",
36: "sports ball",
37: "kite",
38: "baseball bat",
39: "baseball glove",
40: "skateboard",
41: "surfboard",
42: "tennis racket",
43: "bottle",
45: "wine glass",
46: "cup",
47: "fork",
48: "knife",
49: "spoon",
50: "bowl",
51: "banana",
52: "apple",
53: "sandwich",
54: "orange",
55: "broccoli",
56: "carrot",
57: "hot dog",
58: "pizza",
59: "donut",
60: "cake",
61: "chair",
62: "couch",
63: "potted plant",
64: "bed",
66: "dining table",
69: "toilet",
71: "tv",
72: "laptop",
73: "mouse",
74: "remote",
75: "keyboard",
76: "cell phone",
77: "microwave",
78: "oven",
79: "toaster",
80: "sink",
81: "refrigerator",
83: "book",
84: "clock",
85: "vase",
86: "scissors",
87: "teddy bear",
88: "hair drier",
89: "toothbrush",
}
def detect_from_camera():
# Load model and prepare TPU engine
engine = DetectionEngine(MODEL_NAME)
# Prepare OLED
serial = i2c(port=1, address=0x3C)
device = sh1106(serial)
with picamera.PiCamera() as camera:
with picamera.array.PiRGBArray(camera) as stream:
camera.resolution = (640, 480)
while True:
start = time.time()
# capture image
camera.capture(stream, 'rgb', use_video_port=True)
pil_img = Image.fromarray(stream.array)
pil_img = pil_img.resize((300, 300), Image.NEAREST)
# Run inference
ans = engine.DetectWithImage(pil_img, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)
# Retrieve results
with canvas(device) as draw:
if ans:
for obj in ans:
print ('-----------------------------------------')
print('label = ', label2string[obj.label_id])
print ('score = ', obj.score)
box = obj.bounding_box.flatten().tolist()
print ('box = ', box)
x0 = int(box[0] * 128)
y0 = int(box[1] * 64)
x1 = int(box[2] * 128)
y1 = int(box[3] * 64)
draw.rectangle((x0, y0, x1, y1), outline="white", fill=None)
draw.text((x0, y0), label2string[obj.label_id], fill="white")
print('inference time = ', engine.get_inference_time() , '[msec]')
elapsed_time = time.time() - start
print('total time = ', elapsed_time * 1000 , '[msec] (', 1 / elapsed_time, ' fps)')
stream.seek(0)
stream.truncate()
if __name__ == '__main__':
detect_from_camera()
'''
for Raspberry Pi Zero W
Connect OLED(SH1106) i2c to 3(SDA) and 5(SCL) pin
```
sudo apt-get install i2c-tools
sudo raspi-config
# enable camera and i2c
i2cdetect -y 1
wget https://github.com/google-coral/edgetpu-platforms/releases/download/v1.9.2/edgetpu_api_1.9.2.tar.gz
tar xzf edgetpu_api_1.9.2.tar.gz
cd edgetpu_api/
./install.sh
wget https://dl.google.com/coral/canned_models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite
sudo apt install python3-picamera
sudo apt-get install libfreetype6-dev libjpeg-dev build-essential
sudo pip3 install luma.oled
```
'''
import time
import picamera
import picamera.array
import cv2
from PIL import Image
from edgetpu.detection.engine import DetectionEngine
MODEL_NAME = "mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite"
def cv2pil(image_cv):
image_cv = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)
image_pil = Image.fromarray(image_cv)
image_pil = image_pil.convert('RGB')
return image_pil
label2string = \
{
0: "person",
1: "bicycle",
2: "car",
3: "motorcycle",
4: "airplane",
5: "bus",
6: "train",
7: "truck",
8: "boat",
9: "traffic light",
10: "fire hydrant",
12: "stop sign",
13: "parking meter",
14: "bench",
15: "bird",
16: "cat",
17: "dog",
18: "horse",
19: "sheep",
20: "cow",
21: "elephant",
22: "bear",
23: "zebra",
24: "giraffe",
26: "backpack",
27: "umbrella",
30: "handbag",
31: "tie",
32: "suitcase",
33: "frisbee",
34: "skis",
35: "snowboard",
36: "sports ball",
37: "kite",
38: "baseball bat",
39: "baseball glove",
40: "skateboard",
41: "surfboard",
42: "tennis racket",
43: "bottle",
45: "wine glass",
46: "cup",
47: "fork",
48: "knife",
49: "spoon",
50: "bowl",
51: "banana",
52: "apple",
53: "sandwich",
54: "orange",
55: "broccoli",
56: "carrot",
57: "hot dog",
58: "pizza",
59: "donut",
60: "cake",
61: "chair",
62: "couch",
63: "potted plant",
64: "bed",
66: "dining table",
69: "toilet",
71: "tv",
72: "laptop",
73: "mouse",
74: "remote",
75: "keyboard",
76: "cell phone",
77: "microwave",
78: "oven",
79: "toaster",
80: "sink",
81: "refrigerator",
83: "book",
84: "clock",
85: "vase",
86: "scissors",
87: "teddy bear",
88: "hair drier",
89: "toothbrush",
}
def detect_from_image():
# Load model and prepare TPU engine
engine = DetectionEngine(MODEL_NAME)
# prepara input image
img_org = cv2.imread('input.jpg')
# cv2.imshow('image', img)
pil_img = cv2pil(cv2.resize(img_org, (300, 300)))
# Run inference
ans = engine.DetectWithImage(pil_img, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)
# Retrieve results
if ans:
for obj in ans:
print ('-----------------------------------------')
print('label = ', label2string[obj.label_id])
print ('score = ', obj.score)
box = obj.bounding_box.flatten().tolist()
print ('box = ', box)
x0 = int(box[0] * img_org.shape[1])
y0 = int(box[1] * img_org.shape[0])
x1 = int(box[2] * img_org.shape[1])
y1 = int(box[3] * img_org.shape[0])
cv2.rectangle(img_org, (x0, y0), (x1, y1), (255, 0, 0), 2)
cv2.rectangle(img_org, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
cv2.putText(img_org,
str(label2string[obj.label_id]),
(x0, y0),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 255, 255),
2)
cv2.imshow('image', img_org)
cv2.waitKey(0)
cv2.destroyAllWindows()
def detect_from_camera():
# Load model and prepare TPU engine
engine = DetectionEngine(MODEL_NAME)
# Start camera capturing
with picamera.PiCamera() as camera:
with picamera.array.PiRGBArray(camera) as stream:
camera.resolution = (640, 480)
while True:
start = time.time()
# capture image
camera.capture(stream, 'bgr', use_video_port=True)
image_display = stream.array
pil_img = cv2pil(cv2.resize(stream.array, (300, 300)))
# Run inference
ans = engine.DetectWithImage(pil_img, threshold=0.5, keep_aspect_ratio=True, relative_coord=True, top_k=10)
# Retrieve results
if ans:
for obj in ans:
print ('-----------------------------------------')
print('label = ', label2string[obj.label_id])
print ('score = ', obj.score)
box = obj.bounding_box.flatten().tolist()
print ('box = ', box)
x0 = int(box[0] * image_display.shape[1])
y0 = int(box[1] * image_display.shape[0])
x1 = int(box[2] * image_display.shape[1])
y1 = int(box[3] * image_display.shape[0])
cv2.rectangle(image_display, (x0, y0), (x1, y1), (255, 0, 0), 2)
cv2.rectangle(image_display, (x0, y0), (x0 + 100, y0 - 30), (255, 0, 0), -1)
cv2.putText(image_display,
str(label2string[obj.label_id]),
(x0, y0),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 255, 255),
2)
# Draw the result
cv2.imshow('image', image_display)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
print('inference time = ', engine.get_inference_time() , '[msec]')
elapsed_time = time.time() - start
print('total time = ', elapsed_time * 1000 , '[msec] (', 1 / elapsed_time, ' fps)')
stream.seek(0)
stream.truncate()
cv2.destroyAllWindows()
if __name__ == '__main__':
detect_from_camera()
detect_from_image()
'''
for Raspberry Pi 3
cd ~/
wget https://dl.google.com/coral/edgetpu_api/edgetpu_api_latest.tar.gz -O edgetpu_api.tar.gz --trust-server-names
tar xzf edgetpu_api.tar.gz
cd edgetpu_api
bash ./install.sh
sudo apt install python3-picamera
python3 picam_capture.py
https://dl.google.com/coral/canned_models/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment