Last active
March 29, 2022 06:16
-
-
Save rish-16/30a339d1c5a43078056d98d2c632ff15 to your computer and use it in GitHub Desktop.
CS4243 PyTorch Snippets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
""" | |
Creating tensors | |
""" | |
a = torch.rand(...) # returns a torch.Tensor | |
b = torch.LongTensor(10).random_(0, 2) # 10-dim vector from [0, 1] | |
""" | |
Network template | |
""" | |
class Network(nn.Module): | |
def __init__(self): | |
super().__init__() | |
pass | |
def forward(self, x): | |
pass | |
""" | |
batch training loop | |
""" | |
for epoch in in range(EPOCHS): | |
num_batches = 0 | |
shuffled_indices=torch.randperm(60000) | |
running_loss = 0 | |
for i in range(0, DATASETSIZE, BATCHSIZE): | |
idx = shuffled_indices[count:count+bs] | |
idx = torch.LongTensor(DATASETSIZE).random_(BATCHSIZE) | |
minibatch_data = train_data[idx] | |
minibatch_labels = trian_labels[idx] | |
inputs = minibatch_data.view(bs, INPUTSIZE) | |
inputs.requires_grad_() | |
pred = model(inputs) | |
loss = criteron(pred, minibatch_labels) | |
running_loss = loss.detach().item() | |
num_batches += 1 | |
epoch_loss = running_loss / num_batches | |
""" | |
Testing model | |
""" | |
def eval_on_test_set(model, test_data, test_label): | |
running_error=0 | |
num_batches=0 | |
for i in range(0,DATASETSIZE, BATCHSIZE): | |
inputs = test_data[i:i+BATCHSIZE].unsqueeze(dim=1) | |
minibatch_label = test_label[i:i+BATCHSIZE] | |
scores = model(inputs) | |
error = utils.get_error( scores , minibatch_label) | |
running_error += error.item() | |
num_batches+=1 | |
total_error = running_error/num_batches | |
print( 'error rate on test set =', total_error*100 ,'percent') | |
def get_accuracy(scores, labels): | |
# use within the batched training loop to get the batch accuracy | |
num_data = scores.size(0) | |
predicted_labels = scores.argmax(dim=1) | |
indicator = (predicted_labels == labels) | |
num_correct = indicator.sum() | |
accuracy = 100*num_correct.float()/num_data | |
return accuracy | |
""" | |
One-hot encoding | |
""" | |
def index_to_onehot(labels, num_classes=10): | |
""" | |
convert index label to one hot labels | |
Inputs: | |
labels: Integer Tensor of length N, e.g., [0, 1, 2, 4, 3] | |
num_classes: the number of classes, e.g., 5 | |
Output: | |
Tensor: onehot_labels of size [N, num_classes] | |
a matrix that contains one-hot label for each sample: | |
e.g., [ | |
[1, 0, 0, 0, 0], | |
[0, 1, 0, 0, 0], | |
[0, 0, 1, 0, 0], | |
[0, 0, 0, 0, 1], | |
[0, 0, 0, 1, 0] | |
] | |
""" | |
num_samples = len(labels) | |
onehot = torch.zeros(num_samples, num_classes) | |
onehot[torch.arange(num_samples), labels] = 1 | |
return onehot | |
""" | |
Soft-label CrossEntropy | |
Only when final layer does not contain Softmax | |
""" | |
score = net(x) | |
prob = torch.softmax(score, dim=-1) | |
loss = -(prob.log() * y).sum(dim=-1).mean() |
Non-max Suppression
def nms(dets, thresh):
'''
dets is a numpy array : num_dets, 6
The detections are already in sorted order and so can be used directly.
'''
x1 = dets[:, 0]
y1 = dets[:, 1]
z1 = dets[:, 2]
x2 = dets[:, 3]
y2 = dets[:, 4]
z2 = dets[:, 5]
volume = (x2 - x1 + 1) * (y2 - y1 + 1) * (z2 - z1 + 1)
order = torch.arange(dets.size(0)) # The boxes are in sorted order
keep = []
while order.size(0) > 0:
i = order[0] # pick maximum iou box
keep.append(i)
xx1 = torch.max(x1[i], x1[order[1:]])
yy1 = torch.max(y1[i], y1[order[1:]])
zz1 = torch.max(z1[i], z1[order[1:]])
xx2 = torch.max(x2[i], x2[order[1:]])
yy2 = torch.max(y2[i], y2[order[1:]])
zz2 = torch.max(z2[i], z2[order[1:]])
w = torch.max(torch.as_tensor(0.0), xx2 - xx1 + 1) # maximum width
h = torch.max(torch.as_tensor(0.0), yy2 - yy1 + 1) # maxiumum height
l = torch.max(torch.as_tensor(0.0), zz2 - zz1 + 1) # maxiumum length
inter = w * h * l
ovr = inter.float() / (volume[i] + volume[order[1:]] - inter).float()
inds = torch.where(ovr > thresh)[1]
# We basically start from the first index and hence an offset has to be added
# So we keep track of indices which are less than threshold and we keep filtering it away
order = order[inds + 1]
return keep
def non_maximum_suppression(self, boxes):
if len(boxes) > 0:
nb_class = len(boxes[0].classes)
else:
return
for c in range(nb_class):
sorted_indices = np.argsort([-box.classes[c] for box in boxes])
for i in range(len(sorted_indices)):
index_i = sorted_indices[i]
if boxes[index_i].classes[c] == 0: continue
for j in range(i+1, len(sorted_indices)):
index_j = sorted_indices[j]
if self.bbox_iou(boxes[index_i], boxes[index_j]) >= self.nms_threshold:
boxes[index_j].classes[c] = 0
return boxes
LeNet architecture
class LeNet5(nn.Module):
def __init__(self):
super(LeNet5_convnet, self).__init__()
# CL1: 28 x 28 --> 50 x 28 x 28
self.conv1 = nn.Conv2d(1, 50, kernel_size=3, padding=1 )
# MP1: 50 x 28 x 28 --> 50 x 14 x 14
self.pool1 = nn.MaxPool2d(2,2)
# CL2: 50 x 14 x 14 --> 100 x 14 x 14
self.conv2 = nn.Conv2d(50, 100, kernel_size=3, padding=1 )
# MP2: 100 x 14 x 14 --> 100 x 7 x 7
self.pool2 = nn.MaxPool2d(2,2)
# LL1: 100 x 7 x 7 = 4900 --> 100
self.linear1 = nn.Linear(4900, 100)
# LL2: 100 --> 10
self.linear2 = nn.Linear(100,10)
def forward(self, x):
# CL1: 28 x 28 --> 50 x 28 x 28
x = self.conv1(x)
x = torch.relu(x)
# MP1: 50 x 28 x 28 --> 50 x 14 x 14
x = self.pool1(x)
# CL2: 50 x 14 x 14 --> 100 x 14 x 14
x = self.conv2(x)
x = torch.relu(x)
# MP2: 100 x 14 x 14 --> 100 x 7 x 7
x = self.pool2(x)
# LL1: 100 x 7 x 7 = 4900 --> 100
x = x.view(-1, 4900)
x = self.linear1(x)
x = torch.relu(x)
# LL2: 4900 --> 10
x = self.linear2(x)
return x
Faster R-CNN architecture
class FasterRCNN(nn.Module):
def __init__(self, dim, offset, obj_size, n_objects, n_object_classes):
super().__init__()
self.conv1 = nn.Conv2d(1, dim, (3,3), padding=1) # 1x28x28 -> dx28x28
self.conv2 = nn.Conv2d(dim, dim, (3,3), padding=1) # dx28x28 -> dx28x28
self.classifier_head = nn.Linear(dim * obj_size ** 2, n_object_classes) # dx7x7 -> n
# built-in region proposal network
self.conv_anchor = nn.Conv2d(dim, 1, (obj_size, obj_size), padding=offset) # dx28x28 -> 1x28x28
def forward(self, x, bb, training=True):
bs, c, h, w = x.shape
x = self.conv1(x) # [bs x 28 x 28]
x = torch.relu(x)
x = self.conv2(x) # [bs x 28 x 28]
x = torch.relu(x)
scores_box_anchor = self.conv_anchor(x).squeeze() # [bs x 28 x28]
if training:
bbox = []
for b in range(bs):
for k in range(n_objects):
bbox.append(
x[b, : ,bb[b, k, 1].long() - offset:bb[b, k, 1].long()
- offset + obj_size, bb[b, k, 0].long()
- offset:bb[b, k, 0].long()
- offset + ob_size]
)
bbox = torch.stack(bbox, dim=0) # create tensor [bs * n_objs, dim, obj_size * obj_size]
bbox = bbox.view(-1, dim * obj_size**2) # [bs * n_objs, dim * obj_size * obj_size]
scores_bbox_class = self.classifier_head(bbox) # [bs * n_objs, classes]
else:
batch_bbox = []
for b in range(bs):
# compute the coordinates of the top-K bbox anchor scores (K=nb_objects)
scores_bbox_anch_b = scores_bbox_anch[b,:,:]
scores_bbox_anch_b = scores_bbox_anch_b.view(-1) # [im_size * img_size]
_, idx_largest = torch.sort(scores_bbox_anch_b)
idx_largest = idx_largest[:nb_objects]
idx_y = idx_largest // im_size # [nb_objects]
idx_x = idx_largest - idx_y*im_size # [nb_objects]
# extract the top-K bboxes of size [batch_size, nb_objects, hidden_dim, ob_size, ob_size]
bbox = []
for k in range(nb_objects):
bbox.append(x[b,:,idx_y[k]-offset:idx_y[k]-offset+ob_size,idx_x[k]-offset:idx_x[k]-offset+ob_size])
bbox = torch.stack(bbox, dim=0) # [nb_objects, hidden_dim, ob_size, ob_size]
bbox = bbox.view(-1, hidden_dim * ob_size**2) # [nb_objects, hidden_dim*ob_size*ob_size]
batch_bbox.append(bbox)
# compute the class scores of the bbox
# size of tensor scores_bbox_class is [batch_size*nb_objects, nb_class_objects]
batch_bbox = torch.cat(batch_bbox, 0)
scores_bbox_class = self.linear_class(batch_bbox)
return scores_bbox_class, scores_bbox_anch
Semantic Segmentation CNN architecture
class SemanticSegmentCNN(nn.Module):
def __init__(self):
super(semantic_CNN, self).__init__()
# downsampling convnet
self.conv1 = nn.Conv2d(1, dim, (3,3), padding=1, stride=2) # 1x28x28 --> hidden_dim x14x14
self.conv2 = nn.Conv2d(dim, dim, (3,3), padding=1, stride=2) # hidden_dim x14x14 --> hidden_dim x7x7
# upsampling convnet
self.trans_conv1 = nn.ConvTranspose2d(dim, dim, (4,4), padding=1, stride=2) # hidden_dim x7x7 --> hidden_dim x14x14
self.trans_conv2 = nn.ConvTranspose2d(dim, dim, (4,4), padding=1, stride=2) # hidden_dim x14x14 --> hidden_dim x28x28
# classification layer
self.classifier_head = nn.Conv2d(dim, nb_pixel_classes, (3,3), padding=1, stride=1) # hidden_dim x28x28 --> nb_pixel_classes x28x28
def forward(self, x):
# downsampling convnet
x = self.conv1(x) # [batch_size, hidden_dim, im_size/2, im_size/2]
x = torch.relu(x)
x = self.conv2(x) # [batch_size, hidden_dim, im_size/4, im_size/4]
x = torch.relu(x)
# upsampling convnet
x = self.trans_conv1(x) # [batch_size, hidden_dim, im_size/2, im_size/2]
x = torch.relu(x)
x = self.trans_conv2(x) # [batch_size, hidden_dim, im_size, im_size]
x = torch.relu(x)
# classification layer
scores_pixel_class = self.classifier_head(x) # [batch_size, nb_pixel_classes, im_size, im_size]
return scores_pixel_class
Bilinear Interpolation
import torch
dtype = torch.cuda.FloatTensor
dtype_long = torch.cuda.LongTensor
def bilinear_interpolate_torch(im, x, y):
x0 = torch.floor(x).type(dtype_long)
x1 = x0 + 1
y0 = torch.floor(y).type(dtype_long)
y1 = y0 + 1
x0 = torch.clamp(x0, 0, im.shape[1]-1)
x1 = torch.clamp(x1, 0, im.shape[1]-1)
y0 = torch.clamp(y0, 0, im.shape[0]-1)
y1 = torch.clamp(y1, 0, im.shape[0]-1)
Ia = im[ y0, x0 ][0]
Ib = im[ y1, x0 ][0]
Ic = im[ y0, x1 ][0]
Id = im[ y1, x1 ][0]
wa = (x1.type(dtype)-x) * (y1.type(dtype)-y)
wb = (x1.type(dtype)-x) * (y-y0.type(dtype))
wc = (x-x0.type(dtype)) * (y1.type(dtype)-y)
wd = (x-x0.type(dtype)) * (y-y0.type(dtype))
return torch.t((torch.t(Ia)*wa)) + torch.t(torch.t(Ib)*wb) + torch.t(torch.t(Ic)*wc) + torch.t(torch.t(Id)*wd)
Cleaned up implementation of Fast R-CNN
class VanillaFastRCNN(nn.Module):
def __init__(self, input_dim, hidden_dim, object_size, classes, n_objects, im_size):
super(VanillaFastRCNN, self).__init__()
# metadata
self.n_objects = n_objects
self.offset = (object_size - 1)// 2
self.object_size = object_size
self.hidden_dim = hidden_dim
self.im_size = im_size
# backbone convnet
self.conv1 = nn.Conv2d(input_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
self.conv2 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
self.conv3 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
self.activation = nn.functional.relu
# per region network, predicting bbox pixel anchor scores
self.conv_boundingbox = nn.Conv2d(hidden_dim, 1, kernel_size = object_size, stride = 1, padding = self.offset) # activation map padded to detect object size
# per region network, predicting region class
self.linear = nn.Linear(in_features = hidden_dim * object_size **2, out_features = classes) # take object-size feature map and classify
def forward(self, input_tensor, bounding_box_tensor, train_flag = True):
# apply backbone convnet for feature extraction
x = input_tensor
x = self.conv1(x)
x = self.activation(x)
x = self.conv2(x)
x = self.activation(x)
x = self.conv3(x)
x = self.activation(x)
# predict bounding box anchors
scores_boundingbox = self.conv_boundingbox(x).squeeze()
# predict classes for each given bounding box
batches, c, h, w = input_tensor.shape
if train_flag:
boxes = []
for b in range(batches): # for each image in batch
for k in range(self.n_objects): # for n objects to be predicted
offset = self.offset
object_size = self.object_size
horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
vertical_down = bounding_box_tensor[b, k, 1].long() - offset
boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
scores_boxes = self.linear(boxes)
else:
total_boxes = []
for b in range(batches):
# get top n_objects box centres from scores by reshaping into array, then sort
scores_boundingbox[b].view(-1) # to im_size * im_size
_, idx_largest = torch.sort(scores_boundingbox, descending = True)
idx_largest = idx_largest[:self.n_objects] # take top n_objects points as centres
idx_y = idx_largest//self.im_size # reshape to y, x coordinate
idx_x = idx_largest - idx_y*self.im_size
# after taking out top n_object boxes, cut out region and append to list of boxes, as in training
boxes = []
for k in range(self.n_objects): # for n objects to be predicted
offset = self.offset
object_size = self.object_size
horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
vertical_down = bounding_box_tensor[b, k, 1].long() - offset
boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
total_boxes.append(boxes)
# classify for whole batch
total_boxes = torch.cat(total_boxes, dim = 0) # list to tensor
scores_boxes = self.linear(total_boxes)
return scores_boxes, scores_boundingbox
Count network parameters
def display_num_param(net):
nb_param = 0
for param in net.parameters():
nb_param += param.numel()
print('There are {} ({:.2f} million) parameters in this neural network'.format(
nb_param, nb_param/1e6)
)
Mask R-CNN architecture
class MaskRCNN(nn.Module):
def __init__(self, input_dim, hidden_dim, object_size, classes, n_objects, im_size):
super(VanillaFastRCNN, self).__init__()
# metadata
self.n_objects = n_objects
self.offset = (object_size - 1)// 2
self.object_size = object_size
self.hidden_dim = hidden_dim
self.im_size = im_size
# downsampling convnet
self.ss_conv1 = nn.Conv2d(1, dim, (3,3), padding=1, stride=2) # 1x28x28 --> hidden_dim x14x14
self.ss_conv2 = nn.Conv2d(dim, dim, (3,3), padding=1, stride=2) # hidden_dim x14x14 --> hidden_dim x7x7
# upsampling convnet
self.ss_trans_conv1 = nn.ConvTranspose2d(dim, dim, (4,4), padding=1, stride=2) # hidden_dim x7x7 --> hidden_dim x14x14
self.ss_trans_conv2 = nn.ConvTranspose2d(dim, dim, (4,4), padding=1, stride=2) # hidden_dim x14x14 --> hidden_dim x28x28
# classification layer
self.ss_classifier_head = nn.Conv2d(dim, nb_pixel_classes, (3,3), padding=1, stride=1) # hidden_dim x28x28 --> nb_pixel_classes x28x28
# backbone convnet
self.conv1 = nn.Conv2d(input_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
self.conv2 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
self.conv3 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
self.activation = nn.functional.relu
# per region network, predicting bbox pixel anchor scores
self.conv_boundingbox = nn.Conv2d(hidden_dim, 1, kernel_size = object_size, stride = 1, padding = self.offset) # activation map padded to detect object size
# per region network, predicting region class
self.linear = nn.Linear(in_features = hidden_dim * object_size **2, out_features = classes) # take object-size feature map and classify
def forward(self, input_tensor, bounding_box_tensor, train_flag = True):
# apply backbone convnet for feature extraction
x = input_tensor
x = self.conv1(x)
x = self.activation(x)
x = self.conv2(x)
x = self.activation(x)
x = self.conv3(x)
x = self.activation(x)
# predict bounding box anchors
scores_boundingbox = self.conv_boundingbox(x).squeeze()
# predict classes for each given bounding box
batches, c, h, w = input_tensor.shape
if train_flag:
boxes = []
for b in range(batches): # for each image in batch
for k in range(self.n_objects): # for n objects to be predicted
offset = self.offset
object_size = self.object_size
horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
vertical_down = bounding_box_tensor[b, k, 1].long() - offset
boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
scores_boxes = self.linear(boxes)
else:
total_boxes = []
for b in range(batches):
# get top n_objects box centres from scores by reshaping into array, then sort
scores_boundingbox[b].view(-1) # to im_size * im_size
_, idx_largest = torch.sort(scores_boundingbox, descending = True)
idx_largest = idx_largest[:self.n_objects] # take top n_objects points as centres
idx_y = idx_largest//self.im_size # reshape to y, x coordinate
idx_x = idx_largest - idx_y*self.im_size
# after taking out top n_object boxes, cut out region and append to list of boxes, as in training
boxes = []
for k in range(self.n_objects): # for n objects to be predicted
offset = self.offset
object_size = self.object_size
horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
vertical_down = bounding_box_tensor[b, k, 1].long() - offset
boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
total_boxes.append(boxes)
# classify for whole batch
total_boxes = torch.cat(total_boxes, dim = 0) # list to tensor
scores_boxes = self.linear(total_boxes)
# downsampling convnet
x = input_tensor
x = self.ss_conv1(x) # [batch_size, hidden_dim, im_size/2, im_size/2]
x = torch.relu(x)
x = self.ss_conv2(x) # [batch_size, hidden_dim, im_size/4, im_size/4]
x = torch.relu(x)
# upsampling convnet
x = self.ss_trans_conv1(x) # [batch_size, hidden_dim, im_size/2, im_size/2]
x = torch.relu(x)
x = self.ss_trans_conv2(x) # [batch_size, hidden_dim, im_size, im_size]
x = torch.relu(x)
# classification layer
scores_pixel_class = self.ss_classifier_head(x) # [batch_size, nb_pixel_classes, im_size, im_size]
return scores_boxes, scores_boundingbox, scores_pixel_class
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
VGG architecture