Skip to content

Instantly share code, notes, and snippets.

@AshStuff
Last active Dec 26, 2018
Embed
What would you like to do?
import math
import torch
import torch.nn as nn
import numpy as np
from torch.utils.checkpoint import checkpoint
def convert_x1y1x2y2_to_xywh(box):
"""
convert box with [x1, y1, x2, y2] to [x, y, w, h]
:param box:(N,4) it can be a tensor or numpy array.
:return: (N,4) tensor or numpy array.
"""
cx = (box[:, 0] + box[:, 2]) / 2.
cy = (box[:, 1] + box[:, 3]) / 2.
cw = box[:, 2] - box[:, 0] + 1
ch = box[:, 3] - box[:, 1] + 1
if torch.is_tensor(box):
return torch.cat((cx.view(-1, 1), cy.view(-1, 1), cw.view(-1, 1), ch.view(-1, 1)), 1)
else:
return np.concatenate((cx.reshape(-1, 1), cy.reshape(-1, 1), cw.reshape(-1, 1), ch.reshape(-1, 1)), 1)
class RegionLoss(nn.Module):
def __init__(self, anchors, n_classes, coord_scale=1, reduction=32, noobject_scale=1, object_scale=5, class_scale=1,
thresh=0.6,
coord_prefill=12800):
super(RegionLoss, self).__init__()
self.anchors = torch.from_numpy(anchors).float()
self.coord_scale = coord_scale
self.reduction = reduction
self.noobject_scale = noobject_scale
self.object_scale = object_scale
self.class_scale = class_scale
self.thresh = thresh
self.coord_prefill = coord_prefill
self.n_classes = n_classes
self.register_buffer('seen', torch.tensor(0))
def forward(self, output, target, seen=None):
anchors = self.anchors.clone()
nH = output.size(2)
nW = output.size(3)
if anchors.max() <= 1:
anchors[:, 0] *= nH
anchors[:, 1] *= nW
nB = output.size(0)
nA = len(anchors)
nC = self.n_classes
nPixels = nH * nW
if seen is not None:
self.seen = torch.tensor(seen)
elif self.training:
self.seen += nB
output = output.view(nB, nA, -1, nPixels)
coord = torch.zeros_like(output[:, :, :4])
coord[:, :, :2] = output[:, :, :2].sigmoid() # tx,ty
coord[:, :, 2:4] = output[:, :, 2:4] # tw,th
conf = output[:, :, 4].sigmoid()
if nC > 1:
cls = output[:, :, 5:].contiguous().view(nB * nA, nC, nPixels).transpose(1, 2).contiguous().view(-1, nC)
# Create prediction boxes
pred_boxes = torch.FloatTensor(nB * nA * nPixels, 4)
lin_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).view(nPixels)
lin_y = torch.linspace(0, nH - 1, nH).view(nH, 1).repeat(1, nW).view(nPixels)
anchor_w = anchors[:, 0].contiguous().view(nA, 1)
anchor_h = anchors[:, 1].contiguous().view(nA, 1)
coord_ = coord.clone()
if coord_.is_cuda:
coord_ = coord_.cpu()
pred_boxes[:, 0] = (coord_[:, :, 0].detach() + lin_x).view(-1)
pred_boxes[:, 1] = (coord_[:, :, 1].detach() + lin_y).view(-1)
pred_boxes[:, 2] = (coord_[:, :, 2].detach().exp() * anchor_w).view(-1)
pred_boxes[:, 3] = (coord_[:, :, 3].detach().exp() * anchor_h).view(-1)
pred_boxes = pred_boxes.cpu()
coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = self.build_targets(pred_boxes, target, nH, nW, anchors)
coord_mask = coord_mask.expand_as(tcoord).sqrt()
conf_mask = conf_mask.sqrt()
# tcoord = tcoord
# tconf = tconf
if nC > 1:
tcls = tcls[cls_mask].view(-1).long()
cls_mask = cls_mask.view(-1, 1).repeat(1, nC)
cls = cls[cls_mask].view(-1, nC)
if coord.is_cuda:
tcoord = tcoord.cuda()
tconf = tconf.cuda()
tcls = tcls.cuda()
coord_mask = coord_mask.cuda()
conf_mask = conf_mask.cuda()
# Compute losses
mse = nn.MSELoss(size_average=False)
self.loss_coord = self.coord_scale * mse(coord * coord_mask, tcoord * coord_mask) / nB
self.loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / nB
if nC > 1:
self.loss_cls = self.class_scale * 2 * nn.CrossEntropyLoss(size_average=False)(cls, tcls) / nB
self.loss_tot = self.loss_coord + self.loss_conf + self.loss_cls
else:
self.loss_cls = None
self.loss_tot = self.loss_coord + self.loss_conf
loss_dict = {}
loss_dict['coord'] = self.loss_coord.item()
loss_dict['conf'] = self.loss_conf.item()
loss_dict['cls'] = self.loss_cls.item()
return self.loss_tot, loss_dict
def build_targets(self, pred_boxes, target, nH, nW, anchors):
nB = len(target)
nA = len(anchors)
nAnchors = nA * nH * nW
nPixels = nH * nW
conf_mask = torch.ones(nB, nA, nPixels, requires_grad=False) * self.noobject_scale
coord_mask = torch.zeros(nB, nA, 1, nPixels, requires_grad=False)
cls_mask = torch.zeros(nB, nA, nPixels, requires_grad=False).byte()
tcoord = torch.zeros(nB, nA, 4, nPixels, requires_grad=False)
tconf = torch.zeros(nB, nA, nPixels, requires_grad=False)
tcls = torch.zeros(nB, nA, nPixels, requires_grad=False)
if self.seen < self.coord_prefill:
coord_mask.fill_(1)
tcoord[:, :, 0].fill_(0.5)
tcoord[:, :, 1].fill_(0.5)
for b in range(nB):
gt = target[b][target[b][:, -1] > -1]
if gt.shape[0] == 0:
continue
gt = torch.from_numpy(gt).float()
cur_pred_boxes = pred_boxes[b * nAnchors: (b + 1) * nAnchors]
anchors_ = torch.cat([torch.zeros_like(anchors), anchors], 1)
gt_box = gt[:, :4]
gt_cls = gt[:, -1]
gt_wh = gt_box / self.reduction
gt_wh = convert_x1y1x2y2_to_xywh(gt_wh)
iou_gt_pred = bbox_ious(gt_wh, cur_pred_boxes)
mask = (iou_gt_pred > self.thresh).sum(0) >= 1
conf_mask[b][mask.view_as(conf_mask[b])] = 0
gt_wh_ = gt_wh.clone()
gt_wh_[:, :2] = 0
iou_gt_anchors = bbox_ious(gt_wh_, anchors_)
_, best_anchors = iou_gt_anchors.max(1)
gt_size = gt.size(0)
for i in range(gt_size):
gi = min(nW - 1, max(0, int(gt_wh[i, 0])))
gj = min(nH - 1, max(0, int(gt_wh[i, 1])))
best_n = best_anchors[i]
iou = iou_gt_pred[i][best_n * nPixels + gj * nW + gi]
coord_mask[b][best_n][0][gj * nW + gi] = 2 - (gt_wh[i, 2] * gt_wh[i, 3]) / nPixels
cls_mask[b][best_n][gj * nW + gi] = 1
conf_mask[b][best_n][gj * nW + gi] = self.object_scale
tcoord[b][best_n][0][gj * nW + gi] = gt_wh[i, 0] - gi
tcoord[b][best_n][1][gj * nW + gi] = gt_wh[i, 1] - gj
tcoord[b][best_n][2][gj * nW + gi] = math.log(gt_wh[i, 2] / anchors[best_n, 0])
tcoord[b][best_n][3][gj * nW + gi] = math.log(gt_wh[i, 3] / anchors[best_n, 1])
tconf[b][best_n][gj * nW + gi] = iou
tcls[b][best_n][gj * nW + gi] = gt_cls[i]
return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls
def bbox_ious(boxes1, boxes2):
""" Compute IOU between all boxes from ``boxes1`` with all boxes from ``boxes2``.
Args:
boxes1 (torch.Tensor): List of bounding boxes
boxes2 (torch.Tensor): List of bounding boxes
Note:
List format: [[xc, yc, w, h],...]
"""
b1x1, b1y1 = (boxes1[:, :2] - (boxes1[:, 2:4] / 2)).split(1, 1)
b1x2, b1y2 = (boxes1[:, :2] + (boxes1[:, 2:4] / 2)).split(1, 1)
b2x1, b2y1 = (boxes2[:, :2] - (boxes2[:, 2:4] / 2)).split(1, 1)
b2x2, b2y2 = (boxes2[:, :2] + (boxes2[:, 2:4] / 2)).split(1, 1)
dx = (b1x2.min(b2x2.t()) - b1x1.max(b2x1.t())).clamp(min=0)
dy = (b1y2.min(b2y2.t()) - b1y1.max(b2y1.t())).clamp(min=0)
intersections = dx * dy
areas1 = (b1x2 - b1x1) * (b1y2 - b1y1)
areas2 = (b2x2 - b2x1) * (b2y2 - b2y1)
unions = (areas1 + areas2.t()) - intersections
return intersections / unions
class Conv_1x1(nn.Module):
def __init__(self, in_planes, out_planes):
super(Conv_1x1, self).__init__()
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1)
self.bn = nn.BatchNorm2d(out_planes)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class MiniModel(nn.Module):
def __init__(self):
super(MiniModel, self).__init__()
dim = 256
self.up5 = nn.Upsample(scale_factor=8, mode='nearest')
self.up4 = nn.Upsample(scale_factor=8, mode='nearest')
self.up3 = nn.Upsample(scale_factor=4, mode='nearest')
self.up2 = nn.Upsample(scale_factor=2, mode='nearest')
self.layer1_1 = Conv_1x1(64, dim)
self.layer2_1 = Conv_1x1(256, dim)
self.layer3_1 = Conv_1x1(512, dim)
self.layer4_1 = Conv_1x1(1024, dim)
self.layer5_1 = Conv_1x1(1024, dim)
self.out1 = nn.Conv2d(1280, 35, kernel_size=1)
def forward(self, layer1, layer2, layer3, layer4, layer5):
n1 = checkpoint(self.layer1_1, layer1)
n2 = checkpoint(self.layer2_1, layer2)
n3 = checkpoint(self.layer3_1, layer3)
n4 = checkpoint(self.layer4_1, layer4)
n5 = checkpoint(self.layer5_1, layer5)
layer2 = self.up2(n2)
layer3 = self.up3(n3)
layer4 = self.up4(n4)
layer5 = self.up5(n5)
x = torch.cat([n1, layer2, layer3, layer4, layer5], 1)
out1 = self.out1(x)
return out1
layer1 = torch.autograd.variable(torch.randn(2, 64, 208, 208), requires_grad=True).cuda()
layer2 = torch.autograd.variable(torch.randn(2, 256, 104, 104), requires_grad=True).cuda()
layer3 = torch.autograd.variable(torch.randn(2, 512, 52, 52), requires_grad=True).cuda()
layer4 = torch.autograd.variable(torch.randn(2, 1024, 26, 26), requires_grad=True).cuda()
layer5 = torch.autograd.variable(torch.randn(2, 1024, 26, 26), requires_grad=True).cuda()
labels = [np.array([[8., 159., 20., 178., 1.],
[354., 275., 373., 292., 1.],
[324., 4., 335., 20., 1.],
[286., 257., 307., 283., 1.],
[163., 11., 178., 38., 1.],
[221., 19., 234., 44., 1.]]),
np.array([[355., 26., 359., 29., 1.],
[357., 345., 363., 353., 1.],
[291., 11., 295., 14., 1.],
[146., 261., 149., 263., 1.],
[356., 6., 368., 18., 1.],
[223., 220., 230., 224., 1.],
[314., 177., 317., 180., 1.],
[146., 263., 149., 266., 1.],
[14., 272., 22., 278., 1.],
[301., 343., 304., 347., 1.],
[246., 34., 249., 36., 1.],
[194., 55., 197., 58., 1.],
[22., 214., 26., 219., 1.],
[318., 162., 329., 170., 1.],
])
]
model = MiniModel()
model = torch.nn.DataParallel(model)
model = model.cuda()
anchors = np.array([[0.01710912, 0.02316356],
[0.04235875, 0.04513844],
[0.05432437, 0.07076002],
[0.09045923, 0.09586145],
[0.15661931, 0.16021108]])
criterion1 = RegionLoss(anchors=anchors,
n_classes=2,
coord_scale=1.0,
reduction=2,
noobject_scale=1.0,
object_scale=5.0,
class_scale=1.0,
thresh=0.6,
coord_prefill=12800
)
model.train()
out1 = model(layer1, layer2, layer3, layer4, layer5)
loss, loss_dict = criterion1(out1, labels)
print(loss)
loss.backward()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment