Created
February 5, 2019 21:58
-
-
Save stijnvanhoey/22636da12da97060f99fead8ffe11cee to your computer and use it in GitHub Desktop.
Apply yolo model to image, refactor of https://github.com/OlafenwaMoses/ImageAI/blob/master/imageai/Detection/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# refactored of https://github.com/OlafenwaMoses/ImageAI/blob/master/imageai/Detection/__init__.py for yolov3 only | |
# credits to @OlafenwaMoses | |
# | |
# MIT License | |
# | |
# Van Hoey S. | |
import os | |
import cv2 | |
import numpy as np | |
from PIL import Image | |
from keras import backend as K | |
from keras.layers import Input | |
# needs to be extracted | |
from imageai.Detection.YOLOv3.models import yolo_main | |
from imageai.Detection.keras_retinanet.utils.colors import label_color | |
from imageai.Detection.YOLOv3.utils import yolo_eval | |
from imageai.Detection.keras_retinanet.utils.visualization import draw_box, draw_caption | |
class YoloObjectDetection: | |
speed_options = ["normal", "fast", "faster", "fastest", "flash"] | |
numbers_to_names = { | |
0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', | |
7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', | |
13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', | |
20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', | |
27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', | |
34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', | |
39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', | |
46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', | |
52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', | |
59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', | |
66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', | |
72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', | |
78: 'hair drier', 79: 'toothbrush'} | |
_yolo_iou = 0.45 | |
_yolo_score = 0.1 | |
_yolo_anchors = np.array([[10., 13.], [16., 30.], [33., 23.], | |
[30., 61.], [62., 45.], [59., 119.], | |
[116., 90.], [156., 198.], [373., 326.]]) | |
def __init__(self, path_model, detection_speed="normal"): | |
if os.path.isfile(path_model) : | |
self._model_path = path_model | |
# placeholders for config | |
self._yolo_boxes, self._yolo_scores, self._yolo_classes = None, None, None | |
self._yolo_input_image_shape = K.placeholder(shape=(2,)) | |
# load the model | |
self._yolo_model_image_size = (416, 416) | |
self.detection_speed = detection_speed | |
# Keras session | |
self.sess = K.get_session() | |
@property | |
def model_path(self): | |
"""Location of the yolo `.h5` model | |
""" | |
return self._model_path | |
@property | |
def detection_speed(self): | |
""" | |
""" | |
return self._detection_speed | |
@detection_speed.setter | |
def detection_speed(self, speed): | |
if speed not in self.speed_options: | |
raise Exception("Invalid speed parameter used.") | |
else: | |
self._detection_speed = speed | |
if (self._detection_speed == "normal"): | |
self._yolo_model_image_size = (416, 416) | |
elif (self._detection_speed == "fast"): | |
self._yolo_model_image_size = (320, 320) | |
elif (self._detection_speed == "faster"): | |
self._yolo_model_image_size = (208, 208) | |
elif (self._detection_speed == "fastest"): | |
self._yolo_model_image_size = (128, 128) | |
elif (self._detection_speed == "flash"): | |
self._yolo_model_image_size = (96, 96) | |
self._model = self.prepare_model(self._detection_speed) | |
def prepare_model(self, detection_speed="normal"): | |
"""Load the model with the given | |
""" | |
model = yolo_main(Input(shape=(None, None, 3)), | |
len(self._yolo_anchors) // 3, | |
len(self.numbers_to_names)) | |
model.load_weights(self.model_path) | |
(self._yolo_boxes, self._yolo_scores, self._yolo_classes) = \ | |
yolo_eval(model.output, self._yolo_anchors, len(self.numbers_to_names), | |
self._yolo_input_image_shape, score_threshold = self._yolo_score, | |
iou_threshold = self._yolo_iou) | |
return model | |
@staticmethod | |
def read_image_array(image_array): | |
image = np.asarray(Image.fromarray(np.uint8(image_array))) | |
return image[:, :, ::-1].copy() | |
@staticmethod | |
def letterbox_image(image, size): | |
iw, ih = image.size | |
w, h = size | |
scale = min(w/iw, h/ih) | |
nw = int(iw*scale) | |
nh = int(ih*scale) | |
image = image.resize((nw,nh), Image.BICUBIC) | |
new_image = Image.new('RGB', size, (128,128,128)) | |
new_image.paste(image, ((w-nw)//2, (h-nh)//2)) | |
return new_image | |
def detect_objects(self, input_image, | |
minimum_percentage_probability = 50, | |
custom_objects = ["person", "car", "bicycle", "motorcycle"], | |
display_object_info = False): | |
output_objects_array = [] | |
# prepare the image | |
image = Image.fromarray(np.uint8(input_image)) | |
input_image = self.read_image_array(input_image) | |
detected_copy = input_image | |
detected_copy = cv2.cvtColor(detected_copy, cv2.COLOR_BGR2RGB) | |
detected_copy2 = input_image | |
detected_copy2 = cv2.cvtColor(detected_copy2, cv2.COLOR_BGR2RGB) | |
new_image_size = (self._yolo_model_image_size[0] - (self._yolo_model_image_size[0] % 32), | |
self._yolo_model_image_size[1] - (self._yolo_model_image_size[1] % 32)) | |
boxed_image = self.letterbox_image(image, new_image_size) | |
image_data = np.array(boxed_image, dtype="float32") | |
image_data /= 255. | |
image_data = np.expand_dims(image_data, 0) | |
model = self._model | |
out_boxes, out_scores, out_classes = self.sess.run( | |
[self._yolo_boxes, self._yolo_scores, self._yolo_classes], | |
feed_dict={ | |
model.input: image_data, | |
self._yolo_input_image_shape: [image.size[1], image.size[0]], | |
K.learning_phase(): 0 | |
}) | |
min_probability = minimum_percentage_probability / 100 | |
counting = 0 | |
for a, b in reversed(list(enumerate(out_classes))): | |
predicted_class = self.numbers_to_names[b] | |
box = out_boxes[a] | |
score = out_scores[a] | |
if score < min_probability: | |
continue | |
if predicted_class not in custom_objects: | |
continue | |
counting += 1 | |
label = "{} {:.2f}".format(predicted_class, score) | |
top, left, bottom, right = box | |
top = max(0, np.floor(top + 0.5).astype('int32')) | |
left = max(0, np.floor(left + 0.5).astype('int32')) | |
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) | |
right = min(image.size[0], np.floor(right + 0.5).astype('int32')) | |
try: | |
color = label_color(b) | |
except: | |
color = (255, 0, 0) | |
detection_details = (left, top, right, bottom) | |
draw_box(detected_copy, detection_details, color=color) | |
if display_object_info: | |
draw_caption(detected_copy, detection_details, label) | |
each_object_details = {} | |
each_object_details["name"] = predicted_class | |
each_object_details["percentage_probability"] = score * 100 | |
each_object_details["box_points"] = detection_details | |
output_objects_array.append(each_object_details) | |
return detected_copy, output_objects_array | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment