Skip to content

Instantly share code, notes, and snippets.

@bpstark
Created January 3, 2020 21:42
Show Gist options
  • Save bpstark/78e17ed98648ccbbf90e486903185724 to your computer and use it in GitHub Desktop.
Save bpstark/78e17ed98648ccbbf90e486903185724 to your computer and use it in GitHub Desktop.
#pylint: disable=missing-module-docstring, missing-function-docstring, missing-class-docstring, too-many-locals, too-many-statements
from __future__ import division
import torch
import numpy as np
import cv2
def unique(tensor):
tensor_np = tensor.cpu().numpy()
unique_np = np.unique(tensor_np)
unique_tensor = torch.from_numpy(unique_np)
tensor_res = tensor.new(unique_tensor.shape)
tensor_res.copy_(unique_tensor)
return tensor_res
def bbox_iou(box1, box2):
"""
Returns the IoU of two bounding boxes
"""
#Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
#get the corrdinates of the intersection rectangle
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)
#Intersection area
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) \
* torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)
#Union Area
b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)
iou = inter_area / (b1_area + b2_area - inter_area)
return iou
#@torch.jit.script
def predict_transform(prediction, inp_dim, anchors, num_classes, cuda=True):
batch_size = prediction.size(0)
stride = inp_dim // prediction.size(2)
grid_size = inp_dim // stride
bbox_attrs = 5 + num_classes
num_anchors = len(anchors)
prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
prediction = prediction.transpose(1, 2).contiguous()
prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
anch_l = [(float(anchor[0]) / stride, float(anchor[1]) / stride) for anchor in anchors]
#Sigmoid the centre_X, centre_Y. and object confidencce
prediction = torch.cat((torch.sigmoid(prediction[:, :, 0:1]), prediction[:, :, 1:]), dim=2)
prediction = torch.cat((torch.sigmoid(prediction[:, :, 1:2]), prediction[:, :, 2:]), dim=2)
prediction = torch.cat((torch.sigmoid(prediction[:, :, 3:4]), prediction[:, :, 4:]), dim=2)
#Add the center offsets
grid = torch.arange(grid_size)
a, b = torch.meshgrid(grid, grid)
x_offset = a.type(torch.FloatTensor).view(-1, 1)
y_offset = b.type(torch.FloatTensor).view(-1, 1)
if cuda:
x_offset = x_offset.cuda()
y_offset = y_offset.cuda()
x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1, num_anchors).view(-1, 2).unsqueeze(0)
prediction = torch.cat((prediction[:, :, :2], prediction[:, :, :2].add(x_y_offset)), dim=2)
#log space transform height and the width
anchors = torch.FloatTensor(anchors)
if cuda:
anchors = anchors.cuda()
anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
prediction = \
torch.cat((prediction[:, :, 2:4],
torch.exp(prediction[:, :, 2:4]) * anchors), dim=2)
prediction = \
torch.cat((prediction[:, :, 5:5+num_classes],
torch.sigmoid(prediction[:, :, 5:5+num_classes])))
prediction = torch.cat((prediction[:, :, :4], prediction[:, :, :4]*stride))
return prediction
def write_results(prediction, confidence, num_classes, nms_conf=0.4):
conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2)
prediction = prediction*conf_mask
box_corner = prediction.new(prediction.shape)
box_corner[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2]/2)
box_corner[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3]/2)
box_corner[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2]/2)
box_corner[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3]/2)
prediction[:, :, :4] = box_corner[:, :, :4]
batch_size = prediction.size(0)
write = False
for ind in range(batch_size):
image_pred = prediction[ind]
max_conf, max_conf_score = torch.max(image_pred[:, 5:5+ num_classes], 1)
max_conf = max_conf.float().unsqueeze(1)
max_conf_score = max_conf_score.float().unsqueeze(1)
seq = (image_pred[:, :5], max_conf, max_conf_score)
image_pred = torch.cat(seq, 1)
non_zero_ind = (torch.nonzero(image_pred[:, 4]))
try:
image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)
except:
continue
if image_pred_.shape[0] == 0:
continue
# Get the various classes detected in the image
img_classes = unique(image_pred_[:, -1]) # -1 index holds the class index
for cls in img_classes:
# perform NMS
# get the detections with one particular class
cls_mask = image_pred_*(image_pred_[:, -1] == cls).float().unsqueeze(1)
class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()
image_pred_class = image_pred_[class_mask_ind].view(-1, 7)
# sort the detections such that the entry with the maximum objectness
# confidence is at the top
conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1]
image_pred_class = image_pred_class[conf_sort_index]
idx = image_pred_class.size(0) # Number of detections
for i in range(idx):
# Get the IOUs of all boxes that come after the one we are looking at
# in the loop
try:
ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
except ValueError:
break
except IndexError:
break
# Zero out all the detections that have IoU > treshhold
iou_mask = (ious < nms_conf).float().unsqueeze(1)
image_pred_class[i+1:] *= iou_mask
# Remove the non-zero entries
non_zero_ind = torch.nonzero(image_pred_class[:, 4]).squeeze()
image_pred_class = image_pred_class[non_zero_ind].view(-1, 7)
batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
# Repeat the batch_id for as many detections of the class cls in the image
seq = batch_ind, image_pred_class
if not write:
output = torch.cat(seq, 1)
write = True
else:
out = torch.cat(seq, 1)
output = torch.cat((output, out))
try:
return output
except:
return 0
def letterbox_image(img, inp_dim):
'''resize image with unchanged aspect ratio using padding'''
img_w, img_h = img.shape[1], img.shape[0]
width, height = inp_dim
new_w = int(img_w * min(width/img_w, height/img_h))
new_h = int(img_h * min(width/img_w, height/img_h))
resized_image = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
canvas[(height-new_h)//2:(height-new_h)//2 + new_h,
(width-new_w)//2:(width-new_w)//2 + new_w, :] = resized_image
return canvas
def prep_image(img, inp_dim):
"""
Prepare image for inputting to the neural network.
Returns a Variable
"""
img = (letterbox_image(img, (inp_dim, inp_dim)))
img = img[:, :, ::-1].transpose((2, 0, 1)).copy()
img = torch.from_numpy(img).float().div(255.0).unsqueeze(0)
return img
def load_classes(namesfile):
fileptr = open(namesfile, "r")
names = fileptr.read().split("\n")[:-1]
return names
#pylint: disable=missing-module-docstring, missing-function-docstring, missing-class-docstring, too-many-locals, too-many-statements, too-many-branches
from __future__ import division
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.functional as F
import numpy as np
import cv2
import onnxruntime as rt
from util import predict_transform
# from torchsummary import summary
# from winmltools import convert_coreml
def get_test_input():
img = cv2.imread("dog-cycle-car.png")
img = cv2.resize(img, (224, 224))
img_ = img[:, :, ::-1].transpose((2, 0, 1))
img_ = img_[np.newaxis, :, :, :] / 255.0
img_ = torch.from_numpy(img_).float()
img_ = Variable(img_)
return img_
def parse_cfg(cfgfile):
file = open(cfgfile, "r")
lines = file.read().split("\n")
lines = [l for l in lines if len(l) > 0]
lines = [l for l in lines if l[0] != "#"]
lines = [l.rstrip().lstrip() for l in lines]
file.close()
block = {}
blocks = []
for line in lines:
if line[0] == "[":
if len(block) != 0:
blocks.append(block)
block = {}
block["type"] = line[1: -1].rstrip()
else:
key, value = line.split("=")
block[key.rstrip()] = value.lstrip()
blocks.append(block)
return blocks
def create_module(blocks):
net_info = blocks[0]
module_list = nn.ModuleList()
prev_filters = 3
output_filters = []
for idx, blk in enumerate(blocks[1:]):
module = nn.Sequential()
if blk["type"] == "convolutional":
activation = blk["activation"]
try:
batch_normalize = int(blk["batch_normalize"])
bias = False
except:
batch_normalize = 0
bias = True
filters = int(blk["filters"])
padding = int(blk["pad"])
kernel_size = int(blk["size"])
stride = int(blk["stride"])
if padding:
pad = (kernel_size - 1) // 2
else:
pad = 0
conv = nn.Conv2d(prev_filters, filters, kernel_size,
stride=stride, padding=pad, bias=bias)
module.add_module("conv_{}".format(idx), conv)
if batch_normalize:
batch_norm = nn.BatchNorm2d(filters)
module.add_module("batch_norm_{}".format(idx), batch_norm)
if activation == "leaky":
activation_ = nn.LeakyReLU(0.1, True)
module.add_module("Leaky_{}".format(idx), activation_)
elif blk["type"] == "upsample":
upsample = nn.Upsample(scale_factor=2, mode="nearest")
module.add_module("upsample_{}".format(idx), upsample)
elif blk["type"] == "route":
blk["layers"] = blk["layers"].split(",")
start = int(blk["layers"][0])
try:
end = int(blk["layers"][1])
except:
end = 0
if start > 0:
start = start - idx
if end > 0:
end = end - idx
route = EmptyLayer()
module.add_module("route_{0}".format(route), route)
if end < 0:
filters = output_filters[idx + start] + output_filters[idx + end]
else:
filters = output_filters[idx + start]
elif blk["type"] == "shortcut":
_ = int(blk["from"])
shortcut = EmptyLayer()
module.add_module("shortcut_{}".format(idx), shortcut)
elif blk["type"] == "maxpool":
stride = int(blk["stride"])
size = int(blk["size"])
if size != 1:
maxpool = nn.MaxPool2d(size, stride)
else:
maxpool = MaxPoolStride1(size)
module.add_module("maxpool_{}".format(idx), maxpool)
elif blk["type"] == "yolo":
mask = blk["mask"].split(",")
mask = [int(val) for val in mask]
anchors = blk["anchors"].split(",")
anchors = [int(a) for a in anchors]
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
anchors = [anchors[i] for i in mask]
detection = DetectionLayer(anchors)
module.add_module("Detection_{}".format(idx), detection)
else:
pass
module_list.append(module)
prev_filters = filters
output_filters.append(filters)
return net_info, module_list
class EmptyLayer(nn.Module):
pass
class DetectionLayer(nn.Module):
def __init__(self, anchors):
super(DetectionLayer, self).__init__()
self.anchors = anchors
def forward(self, x, inp_dim, num_classes, confidence): #pylint: disable=arguments-differ
x = x.data
prediction = x
prediction = predict_transform(prediction, inp_dim, self.anchors, num_classes, confidence)
return prediction
class MaxPoolStride1(nn.Module):
def __init__(self, kernel_size):
super(MaxPoolStride1, self).__init__()
self.kernel_size = kernel_size
self.pad = kernel_size - 1
def forward(self, x): #pylint: disable=arguments-differ
padded_x = F.pad(x, (0, self.pad, 0, self.pad), mode="replicate")
pooled_x = nn.MaxPool2d(self.kernel_size, self.pad)(padded_x)
return pooled_x
class DarkNet(nn.Module):
def __init__(self, cfgfile, weights_file=None):
super(DarkNet, self).__init__()
self.blocks = parse_cfg(cfgfile)
self.net_info, self.module_list = create_module(self.blocks)
if weights_file:
self.load_weights(weights_file)
def get_blocks(self):
return self.blocks
def get_moduel_list(self):
return self.module_list
def forward(self, x, CUDA=False): # pylint: disable=arguments-differ
detections = []
modules = self.blocks[1:]
outputs = {}
write = 0
for i, module in enumerate(modules):
module_type = (module["type"])
if module_type in ("convolutional", "upsample", "maxpool"):
x = self.module_list[i](x)
outputs[i] = x
elif module_type == "route":
layers = module["layers"]
layers = [int(a) for a in layers]
if (layers[0]) > 0:
layers[0] = layers[0] - i
if len(layers) == 1:
x = outputs[i + (layers[0])]
else:
if (layers[1]) > 0:
layers[1] = layers[1] - i
map1 = outputs[i + layers[0]]
map2 = outputs[i + layers[1]]
x = torch.cat((map1, map2), 1)
outputs[i] = x
elif module_type == "shortcut":
from_ = int(module["from"])
x = outputs[i - 1] + outputs[i + from_]
outputs[i] = x
elif module_type == 'yolo':
anchors = self.module_list[i][0].anchors
# Get the input dimensions
inp_dim = int(self.net_info["height"])
# Get the number of classes
num_classes = int(module["classes"])
# Output the result
#x = x.data
x = predict_transform(x, inp_dim, anchors, num_classes, CUDA)
#if type(x) == int:
# continue
if not write:
detections = x
write = 1
else:
detections = torch.cat((detections, x), 1)
#outputs[i] = outputs[i - 1]
outputs[i] = x
return detections
def load_weights(self, weightfile):
fileptr = open(weightfile, "rb")
header = np.fromfile(fileptr, dtype=np.int32, count=5)
self.header = torch.from_numpy(header)
self.seen = self.header[3]
weights = np.fromfile(fileptr, dtype=np.float32)
ptr = 0
for i in range(len(self.module_list)):
module_type = self.blocks[i + 1]["type"]
if module_type == "convolutional":
model = self.module_list[i]
try:
batch_normalize = int(self.blocks[i + 1]["batch_normalize"])
except:
batch_normalize = 0
conv = model[0]
if batch_normalize:
batch_norm = model[1]
num_bn_biases = batch_norm.bias.numel()
bn_biases = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
ptr += num_bn_biases
bn_weights = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
ptr += num_bn_biases
bn_running_mean = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
ptr += num_bn_biases
bn_running_var = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
ptr += num_bn_biases
bn_biases = bn_biases.view_as(batch_norm.bias.data)
bn_weights = bn_weights.view_as(batch_norm.weight.data)
bn_running_mean = bn_running_mean.view_as(batch_norm.running_mean)
bn_running_var = bn_running_var.view_as(batch_norm.running_var)
batch_norm.bias.data.copy_(bn_biases)
batch_norm.weight.data.copy_(bn_weights)
batch_norm.running_mean.copy_(bn_running_mean)
batch_norm.running_var.copy_(bn_running_var)
else:
num_biases = conv.bias.numel()
conv_biases = torch.from_numpy(weights[ptr:ptr + num_biases])
ptr += num_biases
conv_biases = conv_biases.view_as(conv.bias.data)
conv.bias.data.copy_(conv_biases)
num_weights = conv.weight.numel()
conv_weights = torch.from_numpy(weights[ptr:ptr + num_weights])
ptr += num_weights
conv_weights = conv_weights.view_as(conv.weight.data)
conv.weight.data.copy_(conv_weights)
def main():
#net_scripted = torch.jit.script(DarkNet("cfg/yolov3.cfg", "yolov3.weights"))
#dummy_input = torch.ones((1, 3, 224, 224))
#output = net_scripted(dummy_input)
#torch.onnx.export(net_scripted,
# (dummy_input),
# 'model.onnx',
# verbose=True,
# input_names=['input_data'],
# example_outputs=output)
net = DarkNet("cfg/yolov3.cfg")
net.load_weights("yolov3.weights")
for param in net.parameters():
param.requires_grad = False
in_val = get_test_input()
output = net(in_val, False)
torch.onnx.export(net,
torch.ones((1, 3, 224, 224)),
"model.onnx",
export_params=True,
verbose=True,
input_names=['input_data'],
example_outputs=output)
sess = rt.InferenceSession("model.onnx")
print(sess)
#print(summary(net, (3, 224, 224)))
#
#from pytorch2keras import pytorch_to_keras
# we should specify shape of the input tensor
#k_model = pytorch_to_keras(net, torch.ones((1, 3, 224, 224)), [(3, 224, 224,)], verbose=True)
#print(k_model)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment