Skip to content

Instantly share code, notes, and snippets.

@anoken
Last active January 27, 2020 22:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anoken/7ce94342005b0ec3da87b236d591c744 to your computer and use it in GitHub Desktop.
Save anoken/7ce94342005b0ec3da87b236d591c744 to your computer and use it in GitHub Desktop.
YOLO_keras_train
#! /usr/bin/env python
#from __future__ import print_function
#
#https://mlblr.com/includes/mlai/index.html#yolov2
#https://github.com/snakers4/yolov2-fish
from keras.models import Sequential, Model
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.optimizers import SGD, Adam, RMSprop
from keras.layers.merge import concatenate
import matplotlib.pyplot as plt
import keras.backend as K
import tensorflow as tf
import imgaug as ia
from tqdm import tqdm
from imgaug import augmenters as iaa
import numpy as np
import pickle
import os, cv2
from preprocessing import parse_annotation, BatchGenerator
from keras.applications import MobileNet
LABELS = ['raccoon']
IMAGE_H, IMAGE_W = 224, 224
GRID_H, GRID_W = 7 , 7
BOX = 5
CLASS = len(LABELS)
CLASS_WEIGHTS = np.ones(CLASS, dtype='float32')
OBJ_THRESHOLD = 0.3
NMS_THRESHOLD = 0.3
ANCHORS = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]
NO_OBJECT_SCALE = 1.0
OBJECT_SCALE = 5.0
COORD_SCALE = 1.0
CLASS_SCALE = 1.0
BATCH_SIZE = 1
WARM_UP_BATCHES = 0
TRUE_BOX_BUFFER = 50
wt_path = 'yolo.weights'
train_image_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/images/'
train_annot_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/annotations/'
valid_image_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/images/'
valid_annot_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/annotations/'
input_image = Input(shape=(IMAGE_H, IMAGE_W, 3))
true_boxes = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4))
mobilenet = MobileNet(input_shape=(IMAGE_H, IMAGE_W, 3),alpha = 0.75,depth_multiplier = 1, dropout = 0.001,
weights = "imagenet", classes = 1000, include_top=False,
#backend=keras.backend, layers=keras.layers,models=keras.models,utils=keras.utils
)
x = mobilenet(input_image)
# Layer 23
x = Conv2D(BOX * (4 + 1 + CLASS), (1,1), strides=(1,1), padding='same', name='conv_23')(x)
output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(x)
# small hack to allow true_boxes to be registered when Keras build the model
# for more information: https://github.com/fchollet/keras/issues/2790
output = Lambda(lambda args: args[0])([output, true_boxes])
model = Model([input_image, true_boxes], output)
model.summary()
def create_cell_grid(grid_size, batch_size):
x_pos = tf.to_float(tf.range(grid_size))
y_pos = tf.to_float(tf.range(grid_size))
xx, yy = tf.meshgrid(x_pos, y_pos)
xx = tf.expand_dims(xx, -1)
yy = tf.expand_dims(yy, -1)
grid = tf.concat([xx, yy], axis=-1) # (7, 7, 2)
grid = tf.expand_dims(grid, -2) # (7, 7, 1, 2)
grid = tf.tile(grid, (1,1,5,1)) # (7, 7, 5, 2)
grid = tf.expand_dims(grid, 0) # (1, 7, 7, 1, 2)
grid = tf.tile(grid, (batch_size,1,1,1,1)) # (N, 7, 7, 1, 2)
return grid
def custom_loss(y_true, y_pred):
mask_shape = tf.shape(y_true)[:4]
cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1)))
cell_y = tf.transpose(cell_x, (0,2,1,3,4))
cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [BATCH_SIZE, 1, 1, 5, 1])
coord_mask = tf.zeros(mask_shape)
conf_mask = tf.zeros(mask_shape)
class_mask = tf.zeros(mask_shape)
seen = tf.Variable(0.)
total_recall = tf.Variable(0.)
"""
Adjust prediction
"""
### adjust x and y
pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
### adjust w and h
pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(ANCHORS, [1,1,1,BOX,2])
### adjust confidence
pred_box_conf = tf.sigmoid(y_pred[..., 4])
### adjust class probabilities
pred_box_class = y_pred[..., 5:]
"""
Adjust ground truth
"""
### adjust x and y
true_box_xy = y_true[..., 0:2] # relative position to the containing cell
### adjust w and h
true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically
### adjust confidence
true_wh_half = true_box_wh / 2.
true_mins = true_box_xy - true_wh_half
true_maxes = true_box_xy + true_wh_half
pred_wh_half = pred_box_wh / 2.
pred_mins = pred_box_xy - pred_wh_half
pred_maxes = pred_box_xy + pred_wh_half
intersect_mins = tf.maximum(pred_mins, true_mins)
intersect_maxes = tf.minimum(pred_maxes, true_maxes)
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]
union_areas = pred_areas + true_areas - intersect_areas
iou_scores = tf.truediv(intersect_areas, union_areas)
true_box_conf = iou_scores * y_true[..., 4]
### adjust class probabilities
true_box_class = tf.argmax(y_true[..., 5:], -1)
"""
Determine the masks
"""
### coordinate mask: simply the position of the ground truth boxes (the predictors)
coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * COORD_SCALE
### confidence mask: penelize predictors + penalize boxes with low IOU
# penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
true_xy = true_boxes[..., 0:2]
true_wh = true_boxes[..., 2:4]
true_wh_half = true_wh / 2.
true_mins = true_xy - true_wh_half
true_maxes = true_xy + true_wh_half
pred_xy = tf.expand_dims(pred_box_xy, 4)
pred_wh = tf.expand_dims(pred_box_wh, 4)
pred_wh_half = pred_wh / 2.
pred_mins = pred_xy - pred_wh_half
pred_maxes = pred_xy + pred_wh_half
intersect_mins = tf.maximum(pred_mins, true_mins)
intersect_maxes = tf.minimum(pred_maxes, true_maxes)
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
true_areas = true_wh[..., 0] * true_wh[..., 1]
pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
union_areas = pred_areas + true_areas - intersect_areas
iou_scores = tf.truediv(intersect_areas, union_areas)
best_ious = tf.reduce_max(iou_scores, axis=4)
conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * NO_OBJECT_SCALE
# penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
conf_mask = conf_mask + y_true[..., 4] * OBJECT_SCALE
### class mask: simply the position of the ground truth boxes (the predictors)
class_mask = y_true[..., 4] * tf.gather(CLASS_WEIGHTS, true_box_class) * CLASS_SCALE
"""
Warm-up training
"""
no_boxes_mask = tf.to_float(coord_mask < COORD_SCALE/2.)
seen = tf.assign_add(seen, 1.)
true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, WARM_UP_BATCHES),
lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask,
true_box_wh + tf.ones_like(true_box_wh) * np.reshape(ANCHORS, [1,1,1,BOX,2]) * no_boxes_mask,
tf.ones_like(coord_mask)],
lambda: [true_box_xy,
true_box_wh,
coord_mask])
"""
Finalize the loss
"""
nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0))
nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
loss_xy = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy) * coord_mask) / (nb_coord_box + 1e-6) / 2.
loss_wh = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh) * coord_mask) / (nb_coord_box + 1e-6) / 2.
loss_conf = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask) / (nb_conf_box + 1e-6) / 2.
loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
loss = loss_xy + loss_wh + loss_conf + loss_class
"""
Debugging code
"""
nb_true_box = tf.reduce_sum(y_true[..., 4])
nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3))
current_recall = nb_pred_box/(nb_true_box + 1e-6)
total_recall = tf.assign_add(total_recall, current_recall)
# loss = tf.Print(loss, [tf.zeros((1))], message='Dummy Line \t', summarize=1000)
# loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000)
# loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
# loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000)
# loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000)
# loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
# loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000)
# loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000)
return loss
generator_config = {
'IMAGE_H' : IMAGE_H,
'IMAGE_W' : IMAGE_W,
'GRID_H' : GRID_H,
'GRID_W' : GRID_W,
'BOX' : BOX,
'LABELS' : LABELS,
'CLASS' : len(LABELS),
'ANCHORS' : ANCHORS,
'BATCH_SIZE' : BATCH_SIZE,
'TRUE_BOX_BUFFER' : 50,
}
train_imgs, seen_train_labels = parse_annotation(train_annot_folder, train_image_folder, labels=LABELS)
def normalize(image):
image = image / 255.
return image
### read saved pickle of parsed annotations
#with open ('train_imgs', 'rb') as fp:
# train_imgs = pickle.load(fp)
train_batch = BatchGenerator(train_imgs, generator_config, norm=normalize)
valid_imgs, seen_valid_labels = parse_annotation(valid_annot_folder, valid_image_folder, labels=LABELS)
### write parsed annotations to pickle for fast retrieval next time
#with open('valid_imgs', 'wb') as fp:
# pickle.dump(valid_imgs, fp)
### read saved pickle of parsed annotations
#with open ('valid_imgs', 'rb') as fp:
# valid_imgs = pickle.load(fp)
valid_batch = BatchGenerator(valid_imgs, generator_config, norm=normalize, jitter=False)
optimizer = Adam(lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
#optimizer = SGD(lr=1e-4, decay=0.0005, momentum=0.9)
#optimizer = RMSprop(lr=1e-4, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(loss=custom_loss, optimizer=optimizer)
history=model.fit_generator(generator = train_batch,
steps_per_epoch = len(train_batch),
epochs = 30,
verbose = 1,
# max_queue_size = 3
)
model.save('my_yolo.h5')
np.savetxt("model_top_loss.csv", history.history['loss'])
#np.savetxt("model_top_val_loss.csv", history.history['val_loss'])
#np.savetxt("model_top_acc.csv", history.history['acc'])
#np.savetxt("model_top_val_acc.csv", history.history['val_acc'])
#! /usr/bin/env python
#from __future__ import print_function
from keras.models import Sequential, Model
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.optimizers import SGD, Adam, RMSprop
from keras.layers.merge import concatenate
import matplotlib.pyplot as plt
import keras.backend as K
import tensorflow as tf
import imgaug as ia
from tqdm import tqdm
from imgaug import augmenters as iaa
import numpy as np
import pickle
import os, cv2
from preprocessing import parse_annotation, BatchGenerator
from keras.applications import MobileNet
LABELS = ['raccoon']
IMAGE_H, IMAGE_W = 224, 224
GRID_H, GRID_W = 7 , 7
BOX = 5
CLASS = len(LABELS)
CLASS_WEIGHTS = np.ones(CLASS, dtype='float32')
OBJ_THRESHOLD = 0.3
NMS_THRESHOLD = 0.3
ANCHORS = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]
NO_OBJECT_SCALE = 1.0
OBJECT_SCALE = 5.0
COORD_SCALE = 1.0
CLASS_SCALE = 1.0
BATCH_SIZE = 1
WARM_UP_BATCHES = 0
TRUE_BOX_BUFFER = 50
wt_path = 'yolo.weights'
train_image_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/images/'
train_annot_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/annotations/'
valid_image_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/images/'
valid_annot_folder = '/mnt/e/LinuxHome/K210/DATASET/yolo_train/yolo_train_1/raccoon_dataset/annotations/'
input_image = Input(shape=(IMAGE_H, IMAGE_W, 3))
true_boxes = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4))
mobilenet = MobileNet(input_shape=(IMAGE_H, IMAGE_W, 3),alpha = 0.75,depth_multiplier = 1, dropout = 0.001,
weights = "imagenet", classes = 1000, include_top=False,
#backend=keras.backend, layers=keras.layers,models=keras.models,utils=keras.utils
)
x = mobilenet(input_image)
# Layer 23
x = Conv2D(BOX * (4 + 1 + CLASS), (1,1), strides=(1,1), padding='same', name='conv_23')(x)
output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(x)
# small hack to allow true_boxes to be registered when Keras build the model
# for more information: https://github.com/fchollet/keras/issues/2790
#output = Lambda(lambda args: args[0])([output, true_boxes])
model = Model([input_image, true_boxes], output)
model.summary()
def create_cell_grid(grid_size, batch_size):
x_pos = tf.to_float(tf.range(grid_size))
y_pos = tf.to_float(tf.range(grid_size))
xx, yy = tf.meshgrid(x_pos, y_pos)
xx = tf.expand_dims(xx, -1)
yy = tf.expand_dims(yy, -1)
grid = tf.concat([xx, yy], axis=-1) # (7, 7, 2)
grid = tf.expand_dims(grid, -2) # (7, 7, 1, 2)
grid = tf.tile(grid, (1,1,5,1)) # (7, 7, 5, 2)
grid = tf.expand_dims(grid, 0) # (1, 7, 7, 1, 2)
grid = tf.tile(grid, (batch_size,1,1,1,1)) # (N, 7, 7, 1, 2)
return grid
def custom_loss(y_true, y_pred):
"""
# Args
y_pred : (N, 13, 13, 5, 6)
cell_grid : (N, 13, 13, 5, 2)
# Returns
box_xy : (N, 13, 13, 5, 2)
1) sigmoid activation
2) grid offset added
box_wh : (N, 13, 13, 5, 2)
1) exponential activation
2) anchor box multiplied
box_conf : (N, 13, 13, 5, 1)
1) sigmoid activation
box_classes : (N, 13, 13, 5, nb_class)
"""
# bx = sigmoid(tx) + cx, by = sigmoid(ty) + cy
batch_size = tf.shape(y_pred)[0]
grid_size = tf.shape(y_pred)[1]
cell_grid = create_cell_grid(grid_size, batch_size)
pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
pred_box_wh = tf.exp(y_pred[..., 2:4]) * BOX
pred_box_conf = tf.sigmoid(y_pred[..., 4])
pred_box_class = y_pred[..., 5:]
### adjust x and y
true_box_xy = y_true[..., 0:2] # relative position to the containing cell
### adjust w and h
true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically
### adjust confidence
true_wh_half = true_box_wh / 2.
true_mins = true_box_xy - true_wh_half
true_maxes = true_box_xy + true_wh_half
pred_wh_half = pred_box_wh / 2.
pred_mins = pred_box_xy - pred_wh_half
pred_maxes = pred_box_xy + pred_wh_half
intersect_mins = tf.maximum(pred_mins, true_mins)
intersect_maxes = tf.minimum(pred_maxes, true_maxes)
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]
union_areas = pred_areas + true_areas - intersect_areas
iou_scores = tf.truediv(intersect_areas, union_areas)
true_box_conf = iou_scores * y_true[..., 4]
### adjust class probabilities
true_box_class = tf.argmax(y_true[..., 5:], -1)
# concatenate pred tensor
pred_box_conf = tf.expand_dims(pred_box_conf, -1)
y_pred_activated = tf.concat([pred_box_xy, pred_box_wh, pred_box_conf, pred_box_class], axis=-1)
# concatenate true tensor
true_box_conf = tf.expand_dims(true_box_conf, -1)
true_box_class = tf.expand_dims(true_box_class, -1)
true_box_class = tf.cast(true_box_class, true_box_xy.dtype)
y_true_activated = tf.concat([true_box_xy, true_box_wh, true_box_conf, true_box_class], axis=-1)
pred_box_xy, pred_box_wh = pred_tensor[..., :2], pred_tensor[..., 2:4]
true_boxes = y_true[..., :4]
true_boxes = tf.reshape(true_boxes, [batch_size, -1, 4])
true_boxes = tf.expand_dims(true_boxes, 1)
true_boxes = tf.expand_dims(true_boxes, 1)
true_boxes = tf.expand_dims(true_boxes, 1)
true_xy = true_boxes[..., 0:2]
true_wh = true_boxes[..., 2:4]
true_wh_half = true_wh / 2.
true_mins = true_xy - true_wh_half
true_maxes = true_xy + true_wh_half
pred_xy = tf.expand_dims(pred_box_xy, 4)
pred_wh = tf.expand_dims(pred_box_wh, 4)
pred_wh_half = pred_wh / 2.
pred_mins = pred_xy - pred_wh_half
pred_maxes = pred_xy + pred_wh_half
intersect_mins = tf.maximum(pred_mins, true_mins)
intersect_maxes = tf.minimum(pred_maxes, true_maxes)
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
true_areas = true_wh[..., 0] * true_wh[..., 1]
pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
union_areas = pred_areas + true_areas - intersect_areas
iou_scores = tf.truediv(intersect_areas, union_areas)
best_ious = tf.reduce_max(iou_scores, axis=4)
# 1) confidence mask (N, 13, 13, 5)
conf_mask = tf.zeros(tf.shape(y_true)[:4])
conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * NO_OBJECT_SCALE
# penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
conf_mask = conf_mask + y_true[..., 4] * OBJECT_SCALE
class_wt = np.ones(CLASS, dtype='float32')
class_mask = y_true[..., 4] * tf.gather(class_wt, true_box_class) *CLASS_SCALE
coord_mask = tf.expand_dims(y_true[..., BOX_IDX_CONFIDENCE], axis=-1) *COORD_SCALE
nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0))
nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
pred_box_xy, pred_box_wh, pred_box_conf, pred_box_class = pred_tensor[..., :2], pred_tensor[..., 2:4], pred_tensor[..., 4], pred_tensor[..., 5:]
# true_box_xy, true_box_wh, true_box_conf, true_box_class = true_tensor[..., :2], true_tensor[..., 2:4], true_tensor[..., 4], true_tensor[..., 5]
true_box_class = tf.cast(true_box_class, tf.int64)
loss_xy = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy) * coord_mask) / (nb_coord_box + 1e-6) / 2.
loss_wh = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh) * coord_mask) / (nb_coord_box + 1e-6) / 2.
loss_conf = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask) / (nb_conf_box + 1e-6) / 2.
loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
loss = loss_xy + loss_wh + loss_conf + loss_class
return loss
generator_config = {
'IMAGE_H' : IMAGE_H,
'IMAGE_W' : IMAGE_W,
'GRID_H' : GRID_H,
'GRID_W' : GRID_W,
'BOX' : BOX,
'LABELS' : LABELS,
'CLASS' : len(LABELS),
'ANCHORS' : ANCHORS,
'BATCH_SIZE' : BATCH_SIZE,
'TRUE_BOX_BUFFER' : 50,
}
train_imgs, seen_train_labels = parse_annotation(train_annot_folder, train_image_folder, labels=LABELS)
def normalize(image):
image = image / 255.
return image
### read saved pickle of parsed annotations
#with open ('train_imgs', 'rb') as fp:
# train_imgs = pickle.load(fp)
train_batch = BatchGenerator(train_imgs, generator_config, norm=normalize)
valid_imgs, seen_valid_labels = parse_annotation(valid_annot_folder, valid_image_folder, labels=LABELS)
### write parsed annotations to pickle for fast retrieval next time
#with open('valid_imgs', 'wb') as fp:
# pickle.dump(valid_imgs, fp)
### read saved pickle of parsed annotations
#with open ('valid_imgs', 'rb') as fp:
# valid_imgs = pickle.load(fp)
valid_batch = BatchGenerator(valid_imgs, generator_config, norm=normalize, jitter=False)
optimizer = Adam(lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
#optimizer = SGD(lr=1e-4, decay=0.0005, momentum=0.9)
#optimizer = RMSprop(lr=1e-4, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(loss=custom_loss, optimizer=optimizer)
history=model.fit_generator(generator = train_batch,
steps_per_epoch = len(train_batch),
epochs = 30,
verbose = 1,
max_queue_size = 3)
model.save('my_yolo.h5')
np.savetxt("model_top_loss.csv", history.history['loss'])
#np.savetxt("model_top_val_loss.csv", history.history['val_loss'])
#np.savetxt("model_top_acc.csv", history.history['acc'])
#np.savetxt("model_top_val_acc.csv", history.history['val_acc'])
#! /usr/bin/env python
#from __future__ import print_function
from keras.models import Sequential, Model
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.optimizers import SGD, Adam, RMSprop
from keras.layers.merge import concatenate
import matplotlib.pyplot as plt
import keras.backend as K
import tensorflow as tf
import imgaug as ia
from tqdm import tqdm
from imgaug import augmenters as iaa
import numpy as np
import pickle
import os, cv2
from preprocessing import parse_annotation, BatchGenerator
from keras.applications import MobileNet
LABELS = ['raccoon']
IMAGE_H, IMAGE_W = 224, 224
GRID_H, GRID_W = 7 , 7
#IMAGE_H, IMAGE_W = 416, 416
#GRID_H, GRID_W = 13 , 13
BOX = 5
CLASS = len(LABELS)
CLASS_WEIGHTS = np.ones(CLASS, dtype='float32')
OBJ_THRESHOLD = 0.3#0.5
NMS_THRESHOLD = 0.3#0.45
ANCHORS = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]
NO_OBJECT_SCALE = 1.0
OBJECT_SCALE = 5.0
COORD_SCALE = 1.0
CLASS_SCALE = 1.0
BATCH_SIZE = 16
WARM_UP_BATCHES = 0
TRUE_BOX_BUFFER = 50
wt_path = 'yolo.weights'
train_image_folder = '/root/k210/DATASET/raccoon_dataset_yolo/raccoon_dataset/images/'
train_annot_folder = '/root/k210/DATASET/raccoon_dataset_yolo/raccoon_dataset/annotations/'
valid_image_folder = '/root/k210/DATASET/raccoon_dataset_yolo/raccoon_dataset/images/'
valid_annot_folder = '/root/k210/DATASET/raccoon_dataset_yolo/raccoon_dataset/annotations/'
input_image = Input(shape=(IMAGE_H, IMAGE_W, 3))
true_boxes = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4))
mobilenet = MobileNet(input_shape=(IMAGE_H, IMAGE_W, 3),alpha = 0.75,depth_multiplier = 1, dropout = 0.001,
weights = "imagenet", classes = 1000, include_top=False,
#backend=keras.backend, layers=keras.layers,models=keras.models,utils=keras.utils
)
x = mobilenet(input_image)
# Layer 23
x = Conv2D(BOX * (4 + 1 + CLASS), (1,1), strides=(1,1), padding='same', name='conv_23')(x)
output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(x)
# small hack to allow true_boxes to be registered when Keras build the model
# for more information: https://github.com/fchollet/keras/issues/2790
#output = Lambda(lambda args: args[0])([output, true_boxes])
model = Model([input_image, true_boxes], output)
model.summary()
def custom_loss(y_true, y_pred):
mask_shape = tf.shape(y_true)[:4]
cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1)))
cell_y = tf.transpose(cell_x, (0,2,1,3,4))
cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [BATCH_SIZE, 1, 1, 5, 1])
coord_mask = tf.zeros(mask_shape)
conf_mask = tf.zeros(mask_shape)
class_mask = tf.zeros(mask_shape)
seen = tf.Variable(0.)
total_recall = tf.Variable(0.)
"""
Adjust prediction
"""
### adjust x and y
pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
### adjust w and h
pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(ANCHORS, [1,1,1,BOX,2])
### adjust confidence
pred_box_conf = tf.sigmoid(y_pred[..., 4])
### adjust class probabilities
pred_box_class = y_pred[..., 5:]
"""
Adjust ground truth
"""
### adjust x and y
true_box_xy = y_true[..., 0:2] # relative position to the containing cell
### adjust w and h
true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically
### adjust confidence
true_wh_half = true_box_wh / 2.
true_mins = true_box_xy - true_wh_half
true_maxes = true_box_xy + true_wh_half
pred_wh_half = pred_box_wh / 2.
pred_mins = pred_box_xy - pred_wh_half
pred_maxes = pred_box_xy + pred_wh_half
intersect_mins = tf.maximum(pred_mins, true_mins)
intersect_maxes = tf.minimum(pred_maxes, true_maxes)
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]
union_areas = pred_areas + true_areas - intersect_areas
iou_scores = tf.truediv(intersect_areas, union_areas)
true_box_conf = iou_scores * y_true[..., 4]
### adjust class probabilities
true_box_class = tf.argmax(y_true[..., 5:], -1)
"""
Determine the masks
"""
### coordinate mask: simply the position of the ground truth boxes (the predictors)
coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * COORD_SCALE
### confidence mask: penelize predictors + penalize boxes with low IOU
# penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
true_xy = true_boxes[..., 0:2]
true_wh = true_boxes[..., 2:4]
true_wh_half = true_wh / 2.
true_mins = true_xy - true_wh_half
true_maxes = true_xy + true_wh_half
pred_xy = tf.expand_dims(pred_box_xy, 4)
pred_wh = tf.expand_dims(pred_box_wh, 4)
pred_wh_half = pred_wh / 2.
pred_mins = pred_xy - pred_wh_half
pred_maxes = pred_xy + pred_wh_half
intersect_mins = tf.maximum(pred_mins, true_mins)
intersect_maxes = tf.minimum(pred_maxes, true_maxes)
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
true_areas = true_wh[..., 0] * true_wh[..., 1]
pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
union_areas = pred_areas + true_areas - intersect_areas
iou_scores = tf.truediv(intersect_areas, union_areas)
best_ious = tf.reduce_max(iou_scores, axis=4)
conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * NO_OBJECT_SCALE
# penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
conf_mask = conf_mask + y_true[..., 4] * OBJECT_SCALE
### class mask: simply the position of the ground truth boxes (the predictors)
class_mask = y_true[..., 4] * tf.gather(CLASS_WEIGHTS, true_box_class) * CLASS_SCALE
"""
Warm-up training
"""
no_boxes_mask = tf.to_float(coord_mask < COORD_SCALE/2.)
seen = tf.assign_add(seen, 1.)
true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, WARM_UP_BATCHES),
lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask,
true_box_wh + tf.ones_like(true_box_wh) * np.reshape(ANCHORS, [1,1,1,BOX,2]) * no_boxes_mask,
tf.ones_like(coord_mask)],
lambda: [true_box_xy,
true_box_wh,
coord_mask])
"""
Finalize the loss
"""
nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0))
nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
loss_xy = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy) * coord_mask) / (nb_coord_box + 1e-6) / 2.
loss_wh = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh) * coord_mask) / (nb_coord_box + 1e-6) / 2.
loss_conf = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask) / (nb_conf_box + 1e-6) / 2.
loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
loss = loss_xy + loss_wh + loss_conf + loss_class
"""
Debugging code
"""
nb_true_box = tf.reduce_sum(y_true[..., 4])
nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3))
current_recall = nb_pred_box/(nb_true_box + 1e-6)
total_recall = tf.assign_add(total_recall, current_recall)
# loss = tf.Print(loss, [tf.zeros((1))], message='Dummy Line \t', summarize=1000)
# loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000)
# loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
# loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000)
# loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000)
# loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
# loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000)
# loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000)
return loss
generator_config = {
'IMAGE_H' : IMAGE_H,
'IMAGE_W' : IMAGE_W,
'GRID_H' : GRID_H,
'GRID_W' : GRID_W,
'BOX' : BOX,
'LABELS' : LABELS,
'CLASS' : len(LABELS),
'ANCHORS' : ANCHORS,
'BATCH_SIZE' : BATCH_SIZE,
'TRUE_BOX_BUFFER' : 50,
}
train_imgs, seen_train_labels = parse_annotation(train_annot_folder, train_image_folder, labels=LABELS)
def normalize(image):
image = image / 255.
return image
### read saved pickle of parsed annotations
#with open ('train_imgs', 'rb') as fp:
# train_imgs = pickle.load(fp)
train_batch = BatchGenerator(train_imgs, generator_config, norm=normalize)
valid_imgs, seen_valid_labels = parse_annotation(valid_annot_folder, valid_image_folder, labels=LABELS)
### write parsed annotations to pickle for fast retrieval next time
#with open('valid_imgs', 'wb') as fp:
# pickle.dump(valid_imgs, fp)
### read saved pickle of parsed annotations
#with open ('valid_imgs', 'rb') as fp:
# valid_imgs = pickle.load(fp)
valid_batch = BatchGenerator(valid_imgs, generator_config, norm=normalize, jitter=False)
optimizer = Adam(lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
#optimizer = SGD(lr=1e-4, decay=0.0005, momentum=0.9)
#optimizer = RMSprop(lr=1e-4, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(loss=custom_loss, optimizer=optimizer)
history=model.fit_generator(generator = train_batch,
steps_per_epoch = len(train_batch),
epochs = 3,
verbose = 0,
max_queue_size = 3)
history.save('my_yolo.h5')
np.savetxt("model_top_loss.csv", history.history['loss'])
#np.savetxt("model_top_val_loss.csv", history.history['val_loss'])
#np.savetxt("model_top_acc.csv", history.history['acc'])
#np.savetxt("model_top_val_acc.csv", history.history['val_acc'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment