Last active
December 26, 2018 10:07
-
-
Save AshStuff/87cf8051e48da0a5f9d85f74a5d15c71 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import torch | |
import torch.nn as nn | |
import numpy as np | |
from torch.utils.checkpoint import checkpoint | |
def convert_x1y1x2y2_to_xywh(box): | |
""" | |
convert box with [x1, y1, x2, y2] to [x, y, w, h] | |
:param box:(N,4) it can be a tensor or numpy array. | |
:return: (N,4) tensor or numpy array. | |
""" | |
cx = (box[:, 0] + box[:, 2]) / 2. | |
cy = (box[:, 1] + box[:, 3]) / 2. | |
cw = box[:, 2] - box[:, 0] + 1 | |
ch = box[:, 3] - box[:, 1] + 1 | |
if torch.is_tensor(box): | |
return torch.cat((cx.view(-1, 1), cy.view(-1, 1), cw.view(-1, 1), ch.view(-1, 1)), 1) | |
else: | |
return np.concatenate((cx.reshape(-1, 1), cy.reshape(-1, 1), cw.reshape(-1, 1), ch.reshape(-1, 1)), 1) | |
class RegionLoss(nn.Module): | |
def __init__(self, anchors, n_classes, coord_scale=1, reduction=32, noobject_scale=1, object_scale=5, class_scale=1, | |
thresh=0.6, | |
coord_prefill=12800): | |
super(RegionLoss, self).__init__() | |
self.anchors = torch.from_numpy(anchors).float() | |
self.coord_scale = coord_scale | |
self.reduction = reduction | |
self.noobject_scale = noobject_scale | |
self.object_scale = object_scale | |
self.class_scale = class_scale | |
self.thresh = thresh | |
self.coord_prefill = coord_prefill | |
self.n_classes = n_classes | |
self.register_buffer('seen', torch.tensor(0)) | |
def forward(self, output, target, seen=None): | |
anchors = self.anchors.clone() | |
nH = output.size(2) | |
nW = output.size(3) | |
if anchors.max() <= 1: | |
anchors[:, 0] *= nH | |
anchors[:, 1] *= nW | |
nB = output.size(0) | |
nA = len(anchors) | |
nC = self.n_classes | |
nPixels = nH * nW | |
if seen is not None: | |
self.seen = torch.tensor(seen) | |
elif self.training: | |
self.seen += nB | |
output = output.view(nB, nA, -1, nPixels) | |
coord = torch.zeros_like(output[:, :, :4]) | |
coord[:, :, :2] = output[:, :, :2].sigmoid() # tx,ty | |
coord[:, :, 2:4] = output[:, :, 2:4] # tw,th | |
conf = output[:, :, 4].sigmoid() | |
if nC > 1: | |
cls = output[:, :, 5:].contiguous().view(nB * nA, nC, nPixels).transpose(1, 2).contiguous().view(-1, nC) | |
# Create prediction boxes | |
pred_boxes = torch.FloatTensor(nB * nA * nPixels, 4) | |
lin_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).view(nPixels) | |
lin_y = torch.linspace(0, nH - 1, nH).view(nH, 1).repeat(1, nW).view(nPixels) | |
anchor_w = anchors[:, 0].contiguous().view(nA, 1) | |
anchor_h = anchors[:, 1].contiguous().view(nA, 1) | |
coord_ = coord.clone() | |
if coord_.is_cuda: | |
coord_ = coord_.cpu() | |
pred_boxes[:, 0] = (coord_[:, :, 0].detach() + lin_x).view(-1) | |
pred_boxes[:, 1] = (coord_[:, :, 1].detach() + lin_y).view(-1) | |
pred_boxes[:, 2] = (coord_[:, :, 2].detach().exp() * anchor_w).view(-1) | |
pred_boxes[:, 3] = (coord_[:, :, 3].detach().exp() * anchor_h).view(-1) | |
pred_boxes = pred_boxes.cpu() | |
coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = self.build_targets(pred_boxes, target, nH, nW, anchors) | |
coord_mask = coord_mask.expand_as(tcoord).sqrt() | |
conf_mask = conf_mask.sqrt() | |
# tcoord = tcoord | |
# tconf = tconf | |
if nC > 1: | |
tcls = tcls[cls_mask].view(-1).long() | |
cls_mask = cls_mask.view(-1, 1).repeat(1, nC) | |
cls = cls[cls_mask].view(-1, nC) | |
if coord.is_cuda: | |
tcoord = tcoord.cuda() | |
tconf = tconf.cuda() | |
tcls = tcls.cuda() | |
coord_mask = coord_mask.cuda() | |
conf_mask = conf_mask.cuda() | |
# Compute losses | |
mse = nn.MSELoss(size_average=False) | |
self.loss_coord = self.coord_scale * mse(coord * coord_mask, tcoord * coord_mask) / nB | |
self.loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / nB | |
if nC > 1: | |
self.loss_cls = self.class_scale * 2 * nn.CrossEntropyLoss(size_average=False)(cls, tcls) / nB | |
self.loss_tot = self.loss_coord + self.loss_conf + self.loss_cls | |
else: | |
self.loss_cls = None | |
self.loss_tot = self.loss_coord + self.loss_conf | |
loss_dict = {} | |
loss_dict['coord'] = self.loss_coord.item() | |
loss_dict['conf'] = self.loss_conf.item() | |
loss_dict['cls'] = self.loss_cls.item() | |
return self.loss_tot, loss_dict | |
def build_targets(self, pred_boxes, target, nH, nW, anchors): | |
nB = len(target) | |
nA = len(anchors) | |
nAnchors = nA * nH * nW | |
nPixels = nH * nW | |
conf_mask = torch.ones(nB, nA, nPixels, requires_grad=False) * self.noobject_scale | |
coord_mask = torch.zeros(nB, nA, 1, nPixels, requires_grad=False) | |
cls_mask = torch.zeros(nB, nA, nPixels, requires_grad=False).byte() | |
tcoord = torch.zeros(nB, nA, 4, nPixels, requires_grad=False) | |
tconf = torch.zeros(nB, nA, nPixels, requires_grad=False) | |
tcls = torch.zeros(nB, nA, nPixels, requires_grad=False) | |
if self.seen < self.coord_prefill: | |
coord_mask.fill_(1) | |
tcoord[:, :, 0].fill_(0.5) | |
tcoord[:, :, 1].fill_(0.5) | |
for b in range(nB): | |
gt = target[b][target[b][:, -1] > -1] | |
if gt.shape[0] == 0: | |
continue | |
gt = torch.from_numpy(gt).float() | |
cur_pred_boxes = pred_boxes[b * nAnchors: (b + 1) * nAnchors] | |
anchors_ = torch.cat([torch.zeros_like(anchors), anchors], 1) | |
gt_box = gt[:, :4] | |
gt_cls = gt[:, -1] | |
gt_wh = gt_box / self.reduction | |
gt_wh = convert_x1y1x2y2_to_xywh(gt_wh) | |
iou_gt_pred = bbox_ious(gt_wh, cur_pred_boxes) | |
mask = (iou_gt_pred > self.thresh).sum(0) >= 1 | |
conf_mask[b][mask.view_as(conf_mask[b])] = 0 | |
gt_wh_ = gt_wh.clone() | |
gt_wh_[:, :2] = 0 | |
iou_gt_anchors = bbox_ious(gt_wh_, anchors_) | |
_, best_anchors = iou_gt_anchors.max(1) | |
gt_size = gt.size(0) | |
for i in range(gt_size): | |
gi = min(nW - 1, max(0, int(gt_wh[i, 0]))) | |
gj = min(nH - 1, max(0, int(gt_wh[i, 1]))) | |
best_n = best_anchors[i] | |
iou = iou_gt_pred[i][best_n * nPixels + gj * nW + gi] | |
coord_mask[b][best_n][0][gj * nW + gi] = 2 - (gt_wh[i, 2] * gt_wh[i, 3]) / nPixels | |
cls_mask[b][best_n][gj * nW + gi] = 1 | |
conf_mask[b][best_n][gj * nW + gi] = self.object_scale | |
tcoord[b][best_n][0][gj * nW + gi] = gt_wh[i, 0] - gi | |
tcoord[b][best_n][1][gj * nW + gi] = gt_wh[i, 1] - gj | |
tcoord[b][best_n][2][gj * nW + gi] = math.log(gt_wh[i, 2] / anchors[best_n, 0]) | |
tcoord[b][best_n][3][gj * nW + gi] = math.log(gt_wh[i, 3] / anchors[best_n, 1]) | |
tconf[b][best_n][gj * nW + gi] = iou | |
tcls[b][best_n][gj * nW + gi] = gt_cls[i] | |
return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls | |
def bbox_ious(boxes1, boxes2): | |
""" Compute IOU between all boxes from ``boxes1`` with all boxes from ``boxes2``. | |
Args: | |
boxes1 (torch.Tensor): List of bounding boxes | |
boxes2 (torch.Tensor): List of bounding boxes | |
Note: | |
List format: [[xc, yc, w, h],...] | |
""" | |
b1x1, b1y1 = (boxes1[:, :2] - (boxes1[:, 2:4] / 2)).split(1, 1) | |
b1x2, b1y2 = (boxes1[:, :2] + (boxes1[:, 2:4] / 2)).split(1, 1) | |
b2x1, b2y1 = (boxes2[:, :2] - (boxes2[:, 2:4] / 2)).split(1, 1) | |
b2x2, b2y2 = (boxes2[:, :2] + (boxes2[:, 2:4] / 2)).split(1, 1) | |
dx = (b1x2.min(b2x2.t()) - b1x1.max(b2x1.t())).clamp(min=0) | |
dy = (b1y2.min(b2y2.t()) - b1y1.max(b2y1.t())).clamp(min=0) | |
intersections = dx * dy | |
areas1 = (b1x2 - b1x1) * (b1y2 - b1y1) | |
areas2 = (b2x2 - b2x1) * (b2y2 - b2y1) | |
unions = (areas1 + areas2.t()) - intersections | |
return intersections / unions | |
class Conv_1x1(nn.Module): | |
def __init__(self, in_planes, out_planes): | |
super(Conv_1x1, self).__init__() | |
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1) | |
self.bn = nn.BatchNorm2d(out_planes) | |
self.relu = nn.ReLU(inplace=True) | |
def forward(self, x): | |
x = self.conv(x) | |
x = self.bn(x) | |
x = self.relu(x) | |
return x | |
class MiniModel(nn.Module): | |
def __init__(self): | |
super(MiniModel, self).__init__() | |
dim = 256 | |
self.up5 = nn.Upsample(scale_factor=8, mode='nearest') | |
self.up4 = nn.Upsample(scale_factor=8, mode='nearest') | |
self.up3 = nn.Upsample(scale_factor=4, mode='nearest') | |
self.up2 = nn.Upsample(scale_factor=2, mode='nearest') | |
self.layer1_1 = Conv_1x1(64, dim) | |
self.layer2_1 = Conv_1x1(256, dim) | |
self.layer3_1 = Conv_1x1(512, dim) | |
self.layer4_1 = Conv_1x1(1024, dim) | |
self.layer5_1 = Conv_1x1(1024, dim) | |
self.out1 = nn.Conv2d(1280, 35, kernel_size=1) | |
def forward(self, layer1, layer2, layer3, layer4, layer5): | |
n1 = checkpoint(self.layer1_1, layer1) | |
n2 = checkpoint(self.layer2_1, layer2) | |
n3 = checkpoint(self.layer3_1, layer3) | |
n4 = checkpoint(self.layer4_1, layer4) | |
n5 = checkpoint(self.layer5_1, layer5) | |
layer2 = self.up2(n2) | |
layer3 = self.up3(n3) | |
layer4 = self.up4(n4) | |
layer5 = self.up5(n5) | |
x = torch.cat([n1, layer2, layer3, layer4, layer5], 1) | |
out1 = self.out1(x) | |
return out1 | |
layer1 = torch.autograd.variable(torch.randn(2, 64, 208, 208), requires_grad=True).cuda() | |
layer2 = torch.autograd.variable(torch.randn(2, 256, 104, 104), requires_grad=True).cuda() | |
layer3 = torch.autograd.variable(torch.randn(2, 512, 52, 52), requires_grad=True).cuda() | |
layer4 = torch.autograd.variable(torch.randn(2, 1024, 26, 26), requires_grad=True).cuda() | |
layer5 = torch.autograd.variable(torch.randn(2, 1024, 26, 26), requires_grad=True).cuda() | |
labels = [np.array([[8., 159., 20., 178., 1.], | |
[354., 275., 373., 292., 1.], | |
[324., 4., 335., 20., 1.], | |
[286., 257., 307., 283., 1.], | |
[163., 11., 178., 38., 1.], | |
[221., 19., 234., 44., 1.]]), | |
np.array([[355., 26., 359., 29., 1.], | |
[357., 345., 363., 353., 1.], | |
[291., 11., 295., 14., 1.], | |
[146., 261., 149., 263., 1.], | |
[356., 6., 368., 18., 1.], | |
[223., 220., 230., 224., 1.], | |
[314., 177., 317., 180., 1.], | |
[146., 263., 149., 266., 1.], | |
[14., 272., 22., 278., 1.], | |
[301., 343., 304., 347., 1.], | |
[246., 34., 249., 36., 1.], | |
[194., 55., 197., 58., 1.], | |
[22., 214., 26., 219., 1.], | |
[318., 162., 329., 170., 1.], | |
]) | |
] | |
model = MiniModel() | |
model = torch.nn.DataParallel(model) | |
model = model.cuda() | |
anchors = np.array([[0.01710912, 0.02316356], | |
[0.04235875, 0.04513844], | |
[0.05432437, 0.07076002], | |
[0.09045923, 0.09586145], | |
[0.15661931, 0.16021108]]) | |
criterion1 = RegionLoss(anchors=anchors, | |
n_classes=2, | |
coord_scale=1.0, | |
reduction=2, | |
noobject_scale=1.0, | |
object_scale=5.0, | |
class_scale=1.0, | |
thresh=0.6, | |
coord_prefill=12800 | |
) | |
model.train() | |
out1 = model(layer1, layer2, layer3, layer4, layer5) | |
loss, loss_dict = criterion1(out1, labels) | |
print(loss) | |
loss.backward() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment