Created
January 3, 2020 21:42
-
-
Save bpstark/78e17ed98648ccbbf90e486903185724 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pylint: disable=missing-module-docstring, missing-function-docstring, missing-class-docstring, too-many-locals, too-many-statements | |
from __future__ import division | |
import torch | |
import numpy as np | |
import cv2 | |
def unique(tensor): | |
tensor_np = tensor.cpu().numpy() | |
unique_np = np.unique(tensor_np) | |
unique_tensor = torch.from_numpy(unique_np) | |
tensor_res = tensor.new(unique_tensor.shape) | |
tensor_res.copy_(unique_tensor) | |
return tensor_res | |
def bbox_iou(box1, box2): | |
""" | |
Returns the IoU of two bounding boxes | |
""" | |
#Get the coordinates of bounding boxes | |
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] | |
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] | |
#get the corrdinates of the intersection rectangle | |
inter_rect_x1 = torch.max(b1_x1, b2_x1) | |
inter_rect_y1 = torch.max(b1_y1, b2_y1) | |
inter_rect_x2 = torch.min(b1_x2, b2_x2) | |
inter_rect_y2 = torch.min(b1_y2, b2_y2) | |
#Intersection area | |
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) \ | |
* torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0) | |
#Union Area | |
b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1) | |
b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1) | |
iou = inter_area / (b1_area + b2_area - inter_area) | |
return iou | |
#@torch.jit.script | |
def predict_transform(prediction, inp_dim, anchors, num_classes, cuda=True): | |
batch_size = prediction.size(0) | |
stride = inp_dim // prediction.size(2) | |
grid_size = inp_dim // stride | |
bbox_attrs = 5 + num_classes | |
num_anchors = len(anchors) | |
prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size) | |
prediction = prediction.transpose(1, 2).contiguous() | |
prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs) | |
anch_l = [(float(anchor[0]) / stride, float(anchor[1]) / stride) for anchor in anchors] | |
#Sigmoid the centre_X, centre_Y. and object confidencce | |
prediction = torch.cat((torch.sigmoid(prediction[:, :, 0:1]), prediction[:, :, 1:]), dim=2) | |
prediction = torch.cat((torch.sigmoid(prediction[:, :, 1:2]), prediction[:, :, 2:]), dim=2) | |
prediction = torch.cat((torch.sigmoid(prediction[:, :, 3:4]), prediction[:, :, 4:]), dim=2) | |
#Add the center offsets | |
grid = torch.arange(grid_size) | |
a, b = torch.meshgrid(grid, grid) | |
x_offset = a.type(torch.FloatTensor).view(-1, 1) | |
y_offset = b.type(torch.FloatTensor).view(-1, 1) | |
if cuda: | |
x_offset = x_offset.cuda() | |
y_offset = y_offset.cuda() | |
x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1, num_anchors).view(-1, 2).unsqueeze(0) | |
prediction = torch.cat((prediction[:, :, :2], prediction[:, :, :2].add(x_y_offset)), dim=2) | |
#log space transform height and the width | |
anchors = torch.FloatTensor(anchors) | |
if cuda: | |
anchors = anchors.cuda() | |
anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0) | |
prediction = \ | |
torch.cat((prediction[:, :, 2:4], | |
torch.exp(prediction[:, :, 2:4]) * anchors), dim=2) | |
prediction = \ | |
torch.cat((prediction[:, :, 5:5+num_classes], | |
torch.sigmoid(prediction[:, :, 5:5+num_classes]))) | |
prediction = torch.cat((prediction[:, :, :4], prediction[:, :, :4]*stride)) | |
return prediction | |
def write_results(prediction, confidence, num_classes, nms_conf=0.4): | |
conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2) | |
prediction = prediction*conf_mask | |
box_corner = prediction.new(prediction.shape) | |
box_corner[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2]/2) | |
box_corner[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3]/2) | |
box_corner[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2]/2) | |
box_corner[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3]/2) | |
prediction[:, :, :4] = box_corner[:, :, :4] | |
batch_size = prediction.size(0) | |
write = False | |
for ind in range(batch_size): | |
image_pred = prediction[ind] | |
max_conf, max_conf_score = torch.max(image_pred[:, 5:5+ num_classes], 1) | |
max_conf = max_conf.float().unsqueeze(1) | |
max_conf_score = max_conf_score.float().unsqueeze(1) | |
seq = (image_pred[:, :5], max_conf, max_conf_score) | |
image_pred = torch.cat(seq, 1) | |
non_zero_ind = (torch.nonzero(image_pred[:, 4])) | |
try: | |
image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) | |
except: | |
continue | |
if image_pred_.shape[0] == 0: | |
continue | |
# Get the various classes detected in the image | |
img_classes = unique(image_pred_[:, -1]) # -1 index holds the class index | |
for cls in img_classes: | |
# perform NMS | |
# get the detections with one particular class | |
cls_mask = image_pred_*(image_pred_[:, -1] == cls).float().unsqueeze(1) | |
class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() | |
image_pred_class = image_pred_[class_mask_ind].view(-1, 7) | |
# sort the detections such that the entry with the maximum objectness | |
# confidence is at the top | |
conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] | |
image_pred_class = image_pred_class[conf_sort_index] | |
idx = image_pred_class.size(0) # Number of detections | |
for i in range(idx): | |
# Get the IOUs of all boxes that come after the one we are looking at | |
# in the loop | |
try: | |
ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:]) | |
except ValueError: | |
break | |
except IndexError: | |
break | |
# Zero out all the detections that have IoU > treshhold | |
iou_mask = (ious < nms_conf).float().unsqueeze(1) | |
image_pred_class[i+1:] *= iou_mask | |
# Remove the non-zero entries | |
non_zero_ind = torch.nonzero(image_pred_class[:, 4]).squeeze() | |
image_pred_class = image_pred_class[non_zero_ind].view(-1, 7) | |
batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) | |
# Repeat the batch_id for as many detections of the class cls in the image | |
seq = batch_ind, image_pred_class | |
if not write: | |
output = torch.cat(seq, 1) | |
write = True | |
else: | |
out = torch.cat(seq, 1) | |
output = torch.cat((output, out)) | |
try: | |
return output | |
except: | |
return 0 | |
def letterbox_image(img, inp_dim): | |
'''resize image with unchanged aspect ratio using padding''' | |
img_w, img_h = img.shape[1], img.shape[0] | |
width, height = inp_dim | |
new_w = int(img_w * min(width/img_w, height/img_h)) | |
new_h = int(img_h * min(width/img_w, height/img_h)) | |
resized_image = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_CUBIC) | |
canvas = np.full((inp_dim[1], inp_dim[0], 3), 128) | |
canvas[(height-new_h)//2:(height-new_h)//2 + new_h, | |
(width-new_w)//2:(width-new_w)//2 + new_w, :] = resized_image | |
return canvas | |
def prep_image(img, inp_dim): | |
""" | |
Prepare image for inputting to the neural network. | |
Returns a Variable | |
""" | |
img = (letterbox_image(img, (inp_dim, inp_dim))) | |
img = img[:, :, ::-1].transpose((2, 0, 1)).copy() | |
img = torch.from_numpy(img).float().div(255.0).unsqueeze(0) | |
return img | |
def load_classes(namesfile): | |
fileptr = open(namesfile, "r") | |
names = fileptr.read().split("\n")[:-1] | |
return names |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pylint: disable=missing-module-docstring, missing-function-docstring, missing-class-docstring, too-many-locals, too-many-statements, too-many-branches | |
from __future__ import division | |
import torch | |
import torch.nn as nn | |
from torch.autograd import Variable | |
import torch.functional as F | |
import numpy as np | |
import cv2 | |
import onnxruntime as rt | |
from util import predict_transform | |
# from torchsummary import summary | |
# from winmltools import convert_coreml | |
def get_test_input(): | |
img = cv2.imread("dog-cycle-car.png") | |
img = cv2.resize(img, (224, 224)) | |
img_ = img[:, :, ::-1].transpose((2, 0, 1)) | |
img_ = img_[np.newaxis, :, :, :] / 255.0 | |
img_ = torch.from_numpy(img_).float() | |
img_ = Variable(img_) | |
return img_ | |
def parse_cfg(cfgfile): | |
file = open(cfgfile, "r") | |
lines = file.read().split("\n") | |
lines = [l for l in lines if len(l) > 0] | |
lines = [l for l in lines if l[0] != "#"] | |
lines = [l.rstrip().lstrip() for l in lines] | |
file.close() | |
block = {} | |
blocks = [] | |
for line in lines: | |
if line[0] == "[": | |
if len(block) != 0: | |
blocks.append(block) | |
block = {} | |
block["type"] = line[1: -1].rstrip() | |
else: | |
key, value = line.split("=") | |
block[key.rstrip()] = value.lstrip() | |
blocks.append(block) | |
return blocks | |
def create_module(blocks): | |
net_info = blocks[0] | |
module_list = nn.ModuleList() | |
prev_filters = 3 | |
output_filters = [] | |
for idx, blk in enumerate(blocks[1:]): | |
module = nn.Sequential() | |
if blk["type"] == "convolutional": | |
activation = blk["activation"] | |
try: | |
batch_normalize = int(blk["batch_normalize"]) | |
bias = False | |
except: | |
batch_normalize = 0 | |
bias = True | |
filters = int(blk["filters"]) | |
padding = int(blk["pad"]) | |
kernel_size = int(blk["size"]) | |
stride = int(blk["stride"]) | |
if padding: | |
pad = (kernel_size - 1) // 2 | |
else: | |
pad = 0 | |
conv = nn.Conv2d(prev_filters, filters, kernel_size, | |
stride=stride, padding=pad, bias=bias) | |
module.add_module("conv_{}".format(idx), conv) | |
if batch_normalize: | |
batch_norm = nn.BatchNorm2d(filters) | |
module.add_module("batch_norm_{}".format(idx), batch_norm) | |
if activation == "leaky": | |
activation_ = nn.LeakyReLU(0.1, True) | |
module.add_module("Leaky_{}".format(idx), activation_) | |
elif blk["type"] == "upsample": | |
upsample = nn.Upsample(scale_factor=2, mode="nearest") | |
module.add_module("upsample_{}".format(idx), upsample) | |
elif blk["type"] == "route": | |
blk["layers"] = blk["layers"].split(",") | |
start = int(blk["layers"][0]) | |
try: | |
end = int(blk["layers"][1]) | |
except: | |
end = 0 | |
if start > 0: | |
start = start - idx | |
if end > 0: | |
end = end - idx | |
route = EmptyLayer() | |
module.add_module("route_{0}".format(route), route) | |
if end < 0: | |
filters = output_filters[idx + start] + output_filters[idx + end] | |
else: | |
filters = output_filters[idx + start] | |
elif blk["type"] == "shortcut": | |
_ = int(blk["from"]) | |
shortcut = EmptyLayer() | |
module.add_module("shortcut_{}".format(idx), shortcut) | |
elif blk["type"] == "maxpool": | |
stride = int(blk["stride"]) | |
size = int(blk["size"]) | |
if size != 1: | |
maxpool = nn.MaxPool2d(size, stride) | |
else: | |
maxpool = MaxPoolStride1(size) | |
module.add_module("maxpool_{}".format(idx), maxpool) | |
elif blk["type"] == "yolo": | |
mask = blk["mask"].split(",") | |
mask = [int(val) for val in mask] | |
anchors = blk["anchors"].split(",") | |
anchors = [int(a) for a in anchors] | |
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] | |
anchors = [anchors[i] for i in mask] | |
detection = DetectionLayer(anchors) | |
module.add_module("Detection_{}".format(idx), detection) | |
else: | |
pass | |
module_list.append(module) | |
prev_filters = filters | |
output_filters.append(filters) | |
return net_info, module_list | |
class EmptyLayer(nn.Module): | |
pass | |
class DetectionLayer(nn.Module): | |
def __init__(self, anchors): | |
super(DetectionLayer, self).__init__() | |
self.anchors = anchors | |
def forward(self, x, inp_dim, num_classes, confidence): #pylint: disable=arguments-differ | |
x = x.data | |
prediction = x | |
prediction = predict_transform(prediction, inp_dim, self.anchors, num_classes, confidence) | |
return prediction | |
class MaxPoolStride1(nn.Module): | |
def __init__(self, kernel_size): | |
super(MaxPoolStride1, self).__init__() | |
self.kernel_size = kernel_size | |
self.pad = kernel_size - 1 | |
def forward(self, x): #pylint: disable=arguments-differ | |
padded_x = F.pad(x, (0, self.pad, 0, self.pad), mode="replicate") | |
pooled_x = nn.MaxPool2d(self.kernel_size, self.pad)(padded_x) | |
return pooled_x | |
class DarkNet(nn.Module): | |
def __init__(self, cfgfile, weights_file=None): | |
super(DarkNet, self).__init__() | |
self.blocks = parse_cfg(cfgfile) | |
self.net_info, self.module_list = create_module(self.blocks) | |
if weights_file: | |
self.load_weights(weights_file) | |
def get_blocks(self): | |
return self.blocks | |
def get_moduel_list(self): | |
return self.module_list | |
def forward(self, x, CUDA=False): # pylint: disable=arguments-differ | |
detections = [] | |
modules = self.blocks[1:] | |
outputs = {} | |
write = 0 | |
for i, module in enumerate(modules): | |
module_type = (module["type"]) | |
if module_type in ("convolutional", "upsample", "maxpool"): | |
x = self.module_list[i](x) | |
outputs[i] = x | |
elif module_type == "route": | |
layers = module["layers"] | |
layers = [int(a) for a in layers] | |
if (layers[0]) > 0: | |
layers[0] = layers[0] - i | |
if len(layers) == 1: | |
x = outputs[i + (layers[0])] | |
else: | |
if (layers[1]) > 0: | |
layers[1] = layers[1] - i | |
map1 = outputs[i + layers[0]] | |
map2 = outputs[i + layers[1]] | |
x = torch.cat((map1, map2), 1) | |
outputs[i] = x | |
elif module_type == "shortcut": | |
from_ = int(module["from"]) | |
x = outputs[i - 1] + outputs[i + from_] | |
outputs[i] = x | |
elif module_type == 'yolo': | |
anchors = self.module_list[i][0].anchors | |
# Get the input dimensions | |
inp_dim = int(self.net_info["height"]) | |
# Get the number of classes | |
num_classes = int(module["classes"]) | |
# Output the result | |
#x = x.data | |
x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) | |
#if type(x) == int: | |
# continue | |
if not write: | |
detections = x | |
write = 1 | |
else: | |
detections = torch.cat((detections, x), 1) | |
#outputs[i] = outputs[i - 1] | |
outputs[i] = x | |
return detections | |
def load_weights(self, weightfile): | |
fileptr = open(weightfile, "rb") | |
header = np.fromfile(fileptr, dtype=np.int32, count=5) | |
self.header = torch.from_numpy(header) | |
self.seen = self.header[3] | |
weights = np.fromfile(fileptr, dtype=np.float32) | |
ptr = 0 | |
for i in range(len(self.module_list)): | |
module_type = self.blocks[i + 1]["type"] | |
if module_type == "convolutional": | |
model = self.module_list[i] | |
try: | |
batch_normalize = int(self.blocks[i + 1]["batch_normalize"]) | |
except: | |
batch_normalize = 0 | |
conv = model[0] | |
if batch_normalize: | |
batch_norm = model[1] | |
num_bn_biases = batch_norm.bias.numel() | |
bn_biases = torch.from_numpy(weights[ptr:ptr + num_bn_biases]) | |
ptr += num_bn_biases | |
bn_weights = torch.from_numpy(weights[ptr:ptr + num_bn_biases]) | |
ptr += num_bn_biases | |
bn_running_mean = torch.from_numpy(weights[ptr:ptr + num_bn_biases]) | |
ptr += num_bn_biases | |
bn_running_var = torch.from_numpy(weights[ptr:ptr + num_bn_biases]) | |
ptr += num_bn_biases | |
bn_biases = bn_biases.view_as(batch_norm.bias.data) | |
bn_weights = bn_weights.view_as(batch_norm.weight.data) | |
bn_running_mean = bn_running_mean.view_as(batch_norm.running_mean) | |
bn_running_var = bn_running_var.view_as(batch_norm.running_var) | |
batch_norm.bias.data.copy_(bn_biases) | |
batch_norm.weight.data.copy_(bn_weights) | |
batch_norm.running_mean.copy_(bn_running_mean) | |
batch_norm.running_var.copy_(bn_running_var) | |
else: | |
num_biases = conv.bias.numel() | |
conv_biases = torch.from_numpy(weights[ptr:ptr + num_biases]) | |
ptr += num_biases | |
conv_biases = conv_biases.view_as(conv.bias.data) | |
conv.bias.data.copy_(conv_biases) | |
num_weights = conv.weight.numel() | |
conv_weights = torch.from_numpy(weights[ptr:ptr + num_weights]) | |
ptr += num_weights | |
conv_weights = conv_weights.view_as(conv.weight.data) | |
conv.weight.data.copy_(conv_weights) | |
def main(): | |
#net_scripted = torch.jit.script(DarkNet("cfg/yolov3.cfg", "yolov3.weights")) | |
#dummy_input = torch.ones((1, 3, 224, 224)) | |
#output = net_scripted(dummy_input) | |
#torch.onnx.export(net_scripted, | |
# (dummy_input), | |
# 'model.onnx', | |
# verbose=True, | |
# input_names=['input_data'], | |
# example_outputs=output) | |
net = DarkNet("cfg/yolov3.cfg") | |
net.load_weights("yolov3.weights") | |
for param in net.parameters(): | |
param.requires_grad = False | |
in_val = get_test_input() | |
output = net(in_val, False) | |
torch.onnx.export(net, | |
torch.ones((1, 3, 224, 224)), | |
"model.onnx", | |
export_params=True, | |
verbose=True, | |
input_names=['input_data'], | |
example_outputs=output) | |
sess = rt.InferenceSession("model.onnx") | |
print(sess) | |
#print(summary(net, (3, 224, 224))) | |
# | |
#from pytorch2keras import pytorch_to_keras | |
# we should specify shape of the input tensor | |
#k_model = pytorch_to_keras(net, torch.ones((1, 3, 224, 224)), [(3, 224, 224,)], verbose=True) | |
#print(k_model) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment