Skip to content

Instantly share code, notes, and snippets.

@rish-16
Last active March 29, 2022 06:16
Show Gist options
  • Save rish-16/30a339d1c5a43078056d98d2c632ff15 to your computer and use it in GitHub Desktop.
Save rish-16/30a339d1c5a43078056d98d2c632ff15 to your computer and use it in GitHub Desktop.
CS4243 PyTorch Snippets
import torch
import torch.nn as nn
import torch.nn.functional as F
"""
Creating tensors
"""
a = torch.rand(...) # returns a torch.Tensor
b = torch.LongTensor(10).random_(0, 2) # 10-dim vector from [0, 1]
"""
Network template
"""
class Network(nn.Module):
def __init__(self):
super().__init__()
pass
def forward(self, x):
pass
"""
batch training loop
"""
for epoch in in range(EPOCHS):
num_batches = 0
shuffled_indices=torch.randperm(60000)
running_loss = 0
for i in range(0, DATASETSIZE, BATCHSIZE):
idx = shuffled_indices[count:count+bs]
idx = torch.LongTensor(DATASETSIZE).random_(BATCHSIZE)
minibatch_data = train_data[idx]
minibatch_labels = trian_labels[idx]
inputs = minibatch_data.view(bs, INPUTSIZE)
inputs.requires_grad_()
pred = model(inputs)
loss = criteron(pred, minibatch_labels)
running_loss = loss.detach().item()
num_batches += 1
epoch_loss = running_loss / num_batches
"""
Testing model
"""
def eval_on_test_set(model, test_data, test_label):
running_error=0
num_batches=0
for i in range(0,DATASETSIZE, BATCHSIZE):
inputs = test_data[i:i+BATCHSIZE].unsqueeze(dim=1)
minibatch_label = test_label[i:i+BATCHSIZE]
scores = model(inputs)
error = utils.get_error( scores , minibatch_label)
running_error += error.item()
num_batches+=1
total_error = running_error/num_batches
print( 'error rate on test set =', total_error*100 ,'percent')
def get_accuracy(scores, labels):
# use within the batched training loop to get the batch accuracy
num_data = scores.size(0)
predicted_labels = scores.argmax(dim=1)
indicator = (predicted_labels == labels)
num_correct = indicator.sum()
accuracy = 100*num_correct.float()/num_data
return accuracy
"""
One-hot encoding
"""
def index_to_onehot(labels, num_classes=10):
"""
convert index label to one hot labels
Inputs:
labels: Integer Tensor of length N, e.g., [0, 1, 2, 4, 3]
num_classes: the number of classes, e.g., 5
Output:
Tensor: onehot_labels of size [N, num_classes]
a matrix that contains one-hot label for each sample:
e.g., [
[1, 0, 0, 0, 0],
[0, 1, 0, 0, 0],
[0, 0, 1, 0, 0],
[0, 0, 0, 0, 1],
[0, 0, 0, 1, 0]
]
"""
num_samples = len(labels)
onehot = torch.zeros(num_samples, num_classes)
onehot[torch.arange(num_samples), labels] = 1
return onehot
"""
Soft-label CrossEntropy
Only when final layer does not contain Softmax
"""
score = net(x)
prob = torch.softmax(score, dim=-1)
loss = -(prob.log() * y).sum(dim=-1).mean()
@rish-16
Copy link
Author

rish-16 commented Mar 28, 2022

Semantic Segmentation CNN architecture

class SemanticSegmentCNN(nn.Module):
    
    def __init__(self):
        super(semantic_CNN, self).__init__()

        # downsampling convnet
        self.conv1 = nn.Conv2d(1, dim, (3,3), padding=1, stride=2) #  1x28x28 --> hidden_dim x14x14
        self.conv2 = nn.Conv2d(dim, dim, (3,3), padding=1, stride=2) # hidden_dim x14x14 --> hidden_dim x7x7 

        # upsampling convnet
        self.trans_conv1 = nn.ConvTranspose2d(dim, dim, (4,4), padding=1, stride=2) #  hidden_dim x7x7 --> hidden_dim x14x14
        self.trans_conv2 = nn.ConvTranspose2d(dim, dim, (4,4), padding=1, stride=2) #  hidden_dim x14x14 --> hidden_dim x28x28

        # classification layer
        self.classifier_head = nn.Conv2d(dim, nb_pixel_classes, (3,3), padding=1, stride=1) #  hidden_dim x28x28 --> nb_pixel_classes x28x28
        
    def forward(self, x): 
        # downsampling convnet
        x = self.conv1(x) # [batch_size, hidden_dim, im_size/2, im_size/2] 
        x = torch.relu(x)
        x = self.conv2(x) # [batch_size, hidden_dim, im_size/4, im_size/4] 
        x = torch.relu(x) 
        
        # upsampling convnet
        x = self.trans_conv1(x) # [batch_size, hidden_dim, im_size/2, im_size/2] 
        x = torch.relu(x)
        x = self.trans_conv2(x) # [batch_size, hidden_dim, im_size, im_size] 
        x = torch.relu(x) 

        # classification layer
        scores_pixel_class = self.classifier_head(x) # [batch_size, nb_pixel_classes, im_size, im_size] 
        
        return scores_pixel_class

@rish-16
Copy link
Author

rish-16 commented Mar 29, 2022

Bilinear Interpolation

import torch
dtype = torch.cuda.FloatTensor
dtype_long = torch.cuda.LongTensor

def bilinear_interpolate_torch(im, x, y):
    x0 = torch.floor(x).type(dtype_long)
    x1 = x0 + 1
    
    y0 = torch.floor(y).type(dtype_long)
    y1 = y0 + 1

    x0 = torch.clamp(x0, 0, im.shape[1]-1)
    x1 = torch.clamp(x1, 0, im.shape[1]-1)
    y0 = torch.clamp(y0, 0, im.shape[0]-1)
    y1 = torch.clamp(y1, 0, im.shape[0]-1)
    
    Ia = im[ y0, x0 ][0]
    Ib = im[ y1, x0 ][0]
    Ic = im[ y0, x1 ][0]
    Id = im[ y1, x1 ][0]
    
    wa = (x1.type(dtype)-x) * (y1.type(dtype)-y)
    wb = (x1.type(dtype)-x) * (y-y0.type(dtype))
    wc = (x-x0.type(dtype)) * (y1.type(dtype)-y)
    wd = (x-x0.type(dtype)) * (y-y0.type(dtype))

    return torch.t((torch.t(Ia)*wa)) + torch.t(torch.t(Ib)*wb) + torch.t(torch.t(Ic)*wc) + torch.t(torch.t(Id)*wd)

@tanyjnaaman
Copy link

tanyjnaaman commented Mar 29, 2022

Cleaned up implementation of Fast R-CNN

class VanillaFastRCNN(nn.Module):

    def __init__(self, input_dim, hidden_dim, object_size, classes, n_objects, im_size):
        super(VanillaFastRCNN, self).__init__()

        # metadata
        self.n_objects = n_objects
        self.offset = (object_size - 1)// 2
        self.object_size = object_size
        self.hidden_dim = hidden_dim
        self.im_size = im_size

        # backbone convnet
        self.conv1 = nn.Conv2d(input_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
        self.conv2 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
        self.conv3 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
        self.activation = nn.functional.relu
        
        # per region network, predicting bbox pixel anchor scores
        self.conv_boundingbox = nn.Conv2d(hidden_dim, 1, kernel_size = object_size, stride = 1, padding = self.offset) # activation map padded to detect object size

        # per region network, predicting region class
        self.linear = nn.Linear(in_features = hidden_dim * object_size **2, out_features = classes) # take object-size feature map and classify

    def forward(self, input_tensor, bounding_box_tensor, train_flag = True):

        # apply backbone convnet for feature extraction
        x = input_tensor
        x = self.conv1(x)
        x = self.activation(x)
        x = self.conv2(x)
        x = self.activation(x)
        x = self.conv3(x)
        x = self.activation(x)

        # predict bounding box anchors
        scores_boundingbox = self.conv_boundingbox(x).squeeze()

        # predict classes for each given bounding box
        batches, c, h, w = input_tensor.shape
        if train_flag:
            boxes = []
            for b in range(batches): # for each image in batch
                for k in range(self.n_objects): # for n objects to be predicted
                    offset = self.offset
                    object_size = self.object_size
                    horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
                    vertical_down = bounding_box_tensor[b, k, 1].long() - offset
                    boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
            boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
            boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
            scores_boxes = self.linear(boxes)

        else:
            total_boxes = []
            for b in range(batches):

                # get top n_objects box centres from scores by reshaping into array, then sort
                scores_boundingbox[b].view(-1) # to im_size * im_size
                _, idx_largest = torch.sort(scores_boundingbox, descending = True)
                idx_largest = idx_largest[:self.n_objects] # take top n_objects points as centres
                idx_y = idx_largest//self.im_size  # reshape to y, x coordinate
                idx_x = idx_largest - idx_y*self.im_size

                # after taking out top n_object boxes, cut out region and append to list of boxes, as in training
                boxes = []
                for k in range(self.n_objects): # for n objects to be predicted
                    offset = self.offset
                    object_size = self.object_size
                    horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
                    vertical_down = bounding_box_tensor[b, k, 1].long() - offset
                    boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
                boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
                boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
                total_boxes.append(boxes)

            # classify for whole batch
            total_boxes = torch.cat(total_boxes, dim = 0) # list to tensor
            scores_boxes = self.linear(total_boxes)

        return scores_boxes, scores_boundingbox

@rish-16
Copy link
Author

rish-16 commented Mar 29, 2022

Count network parameters

def display_num_param(net):
    nb_param = 0
    for param in net.parameters():
        nb_param += param.numel()
    print('There are {} ({:.2f} million) parameters in this neural network'.format(
        nb_param, nb_param/1e6)
         )

@rish-16
Copy link
Author

rish-16 commented Mar 29, 2022

Mask R-CNN architecture

class MaskRCNN(nn.Module):

    def __init__(self, input_dim, hidden_dim, object_size, classes, n_objects, im_size):
        super(VanillaFastRCNN, self).__init__()

        # metadata
        self.n_objects = n_objects
        self.offset = (object_size - 1)// 2
        self.object_size = object_size
        self.hidden_dim = hidden_dim
        self.im_size = im_size

       # downsampling convnet
        self.ss_conv1 = nn.Conv2d(1, dim, (3,3), padding=1, stride=2) #  1x28x28 --> hidden_dim x14x14
        self.ss_conv2 = nn.Conv2d(dim, dim, (3,3), padding=1, stride=2) # hidden_dim x14x14 --> hidden_dim x7x7 

        # upsampling convnet
        self.ss_trans_conv1 = nn.ConvTranspose2d(dim, dim, (4,4), padding=1, stride=2) #  hidden_dim x7x7 --> hidden_dim x14x14
        self.ss_trans_conv2 = nn.ConvTranspose2d(dim, dim, (4,4), padding=1, stride=2) #  hidden_dim x14x14 --> hidden_dim x28x28

        # classification layer
        self.ss_classifier_head = nn.Conv2d(dim, nb_pixel_classes, (3,3), padding=1, stride=1) #  hidden_dim x28x28 --> nb_pixel_classes x28x28

        # backbone convnet
        self.conv1 = nn.Conv2d(input_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
        self.conv2 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
        self.conv3 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size = 5, stride = 1, padding = 2) # same size
        self.activation = nn.functional.relu
        
        # per region network, predicting bbox pixel anchor scores
        self.conv_boundingbox = nn.Conv2d(hidden_dim, 1, kernel_size = object_size, stride = 1, padding = self.offset) # activation map padded to detect object size

        # per region network, predicting region class
        self.linear = nn.Linear(in_features = hidden_dim * object_size **2, out_features = classes) # take object-size feature map and classify

    def forward(self, input_tensor, bounding_box_tensor, train_flag = True):

        # apply backbone convnet for feature extraction
        x = input_tensor
        x = self.conv1(x)
        x = self.activation(x)
        x = self.conv2(x)
        x = self.activation(x)
        x = self.conv3(x)
        x = self.activation(x)

        # predict bounding box anchors
        scores_boundingbox = self.conv_boundingbox(x).squeeze()

        # predict classes for each given bounding box
        batches, c, h, w = input_tensor.shape
        if train_flag:
            boxes = []
            for b in range(batches): # for each image in batch
                for k in range(self.n_objects): # for n objects to be predicted
                    offset = self.offset
                    object_size = self.object_size
                    horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
                    vertical_down = bounding_box_tensor[b, k, 1].long() - offset
                    boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
            boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
            boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
            scores_boxes = self.linear(boxes)

        else:
            total_boxes = []
            for b in range(batches):

                # get top n_objects box centres from scores by reshaping into array, then sort
                scores_boundingbox[b].view(-1) # to im_size * im_size
                _, idx_largest = torch.sort(scores_boundingbox, descending = True)
                idx_largest = idx_largest[:self.n_objects] # take top n_objects points as centres
                idx_y = idx_largest//self.im_size  # reshape to y, x coordinate
                idx_x = idx_largest - idx_y*self.im_size

                # after taking out top n_object boxes, cut out region and append to list of boxes, as in training
                boxes = []
                for k in range(self.n_objects): # for n objects to be predicted
                    offset = self.offset
                    object_size = self.object_size
                    horizontal_left = bounding_box_tensor[b, k, 0].long() - offset
                    vertical_down = bounding_box_tensor[b, k, 1].long() - offset
                    boxes.append(x[b,:,vertical_down:vertical_down + object_size, horizontal_left:horizontal_left + object_size]) # cut out boxes of size object_size^2
                boxes = torch.stack(boxes, dim = 0) # stack to get tensor of (batch_size * n_objects) * hidden_dim * object_size * object_size
                boxes = boxes.view(-1, self.hidden_dim * object_size * object_size) # reshape for classification with linear layer
                total_boxes.append(boxes)

            # classify for whole batch
            total_boxes = torch.cat(total_boxes, dim = 0) # list to tensor
            scores_boxes = self.linear(total_boxes)

        # downsampling convnet
        x = input_tensor
        x = self.ss_conv1(x) # [batch_size, hidden_dim, im_size/2, im_size/2] 
        x = torch.relu(x)
        x = self.ss_conv2(x) # [batch_size, hidden_dim, im_size/4, im_size/4] 
        x = torch.relu(x) 
        
        # upsampling convnet
        x = self.ss_trans_conv1(x) # [batch_size, hidden_dim, im_size/2, im_size/2] 
        x = torch.relu(x)
        x = self.ss_trans_conv2(x) # [batch_size, hidden_dim, im_size, im_size] 
        x = torch.relu(x) 

        # classification layer
        scores_pixel_class = self.ss_classifier_head(x) # [batch_size, nb_pixel_classes, im_size, im_size] 

        return scores_boxes, scores_boundingbox, scores_pixel_class

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment