Created February 5, 2019 21:58
# refactored of for yolov3 only
# credits to @OlafenwaMoses
# MIT License
# Van Hoey S.
import os
import cv2
import numpy as np
from PIL import Image
from keras import backend as K
from keras.layers import Input
# needs to be extracted
from imageai.Detection.YOLOv3.models import yolo_main
from imageai.Detection.keras_retinanet.utils.colors import label_color
from imageai.Detection.YOLOv3.utils import yolo_eval
from imageai.Detection.keras_retinanet.utils.visualization import draw_box, draw_caption
class YoloObjectDetection:
speed_options = ["normal", "fast", "faster", "fastest", "flash"]
numbers_to_names = {
0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train',
7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter',
13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag',
27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite',
34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket',
39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl',
46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot',
52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant',
59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote',
66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink',
72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear',
78: 'hair drier', 79: 'toothbrush'}
_yolo_iou = 0.45
_yolo_score = 0.1
_yolo_anchors = np.array([[10., 13.], [16., 30.], [33., 23.],
[30., 61.], [62., 45.], [59., 119.],
[116., 90.], [156., 198.], [373., 326.]])
def __init__(self, path_model, detection_speed="normal"):
if os.path.isfile(path_model) :
self._model_path = path_model
# placeholders for config
self._yolo_boxes, self._yolo_scores, self._yolo_classes = None, None, None
self._yolo_input_image_shape = K.placeholder(shape=(2,))
# load the model
self._yolo_model_image_size = (416, 416)
self.detection_speed = detection_speed
# Keras session
self.sess = K.get_session()
def model_path(self):
"""Location of the yolo `.h5` model
return self._model_path
def detection_speed(self):
return self._detection_speed
def detection_speed(self, speed):
if speed not in self.speed_options:
raise Exception("Invalid speed parameter used.")
self._detection_speed = speed
if (self._detection_speed == "normal"):
self._yolo_model_image_size = (416, 416)
elif (self._detection_speed == "fast"):
self._yolo_model_image_size = (320, 320)
elif (self._detection_speed == "faster"):
self._yolo_model_image_size = (208, 208)
elif (self._detection_speed == "fastest"):
self._yolo_model_image_size = (128, 128)
elif (self._detection_speed == "flash"):
self._yolo_model_image_size = (96, 96)
self._model = self.prepare_model(self._detection_speed)
def prepare_model(self, detection_speed="normal"):
"""Load the model with the given
model = yolo_main(Input(shape=(None, None, 3)),
len(self._yolo_anchors) // 3,
(self._yolo_boxes, self._yolo_scores, self._yolo_classes) = \
yolo_eval(model.output, self._yolo_anchors, len(self.numbers_to_names),
self._yolo_input_image_shape, score_threshold = self._yolo_score,
iou_threshold = self._yolo_iou)
return model
def read_image_array(image_array):
image = np.asarray(Image.fromarray(np.uint8(image_array)))
return image[:, :, ::-1].copy()
def letterbox_image(image, size):
iw, ih = image.size
w, h = size
scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)
image = image.resize((nw,nh), Image.BICUBIC)
new_image ='RGB', size, (128,128,128))
new_image.paste(image, ((w-nw)//2, (h-nh)//2))
return new_image
def detect_objects(self, input_image,
minimum_percentage_probability = 50,
custom_objects = ["person", "car", "bicycle", "motorcycle"],
display_object_info = False):
output_objects_array = []
# prepare the image
image = Image.fromarray(np.uint8(input_image))
input_image = self.read_image_array(input_image)
detected_copy = input_image
detected_copy = cv2.cvtColor(detected_copy, cv2.COLOR_BGR2RGB)
detected_copy2 = input_image
detected_copy2 = cv2.cvtColor(detected_copy2, cv2.COLOR_BGR2RGB)
new_image_size = (self._yolo_model_image_size[0] - (self._yolo_model_image_size[0] % 32),
self._yolo_model_image_size[1] - (self._yolo_model_image_size[1] % 32))
boxed_image = self.letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype="float32")
image_data /= 255.
image_data = np.expand_dims(image_data, 0)
model = self._model
out_boxes, out_scores, out_classes =
[self._yolo_boxes, self._yolo_scores, self._yolo_classes],
model.input: image_data,
self._yolo_input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
min_probability = minimum_percentage_probability / 100
counting = 0
for a, b in reversed(list(enumerate(out_classes))):
predicted_class = self.numbers_to_names[b]
box = out_boxes[a]
score = out_scores[a]
if score < min_probability:
if predicted_class not in custom_objects:
counting += 1
label = "{} {:.2f}".format(predicted_class, score)
top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
color = label_color(b)
color = (255, 0, 0)
detection_details = (left, top, right, bottom)
draw_box(detected_copy, detection_details, color=color)
if display_object_info:
draw_caption(detected_copy, detection_details, label)
each_object_details = {}
each_object_details["name"] = predicted_class
each_object_details["percentage_probability"] = score * 100
each_object_details["box_points"] = detection_details
return detected_copy, output_objects_array
