Skip to content

Instantly share code, notes, and snippets.

@XinDongol
Created January 29, 2019 17:49
Show Gist options
  • Save XinDongol/88bcae15bc722481a0ee748d08f4040a to your computer and use it in GitHub Desktop.
Save XinDongol/88bcae15bc722481a0ee748d08f4040a to your computer and use it in GitHub Desktop.
# The CNN takes partitioned input as well as the input with lower resolution, and no communication is made between each VM
'''VGG11/13/16/19 in Pytorch.'''
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import random
import numpy as np
from input_activation import Lossy_Linear, Lossy_Quant_Linear, Masked_Linear
from markov_random import markov_rand
import time
global nonzero_pixels_rate
global bytes_per_packet
nonzero_pixels = []
bytes_per_packet = []
num_gpu = 2
#nonzero_pixels_rate = np.zeros((6, num_gpu), dtype=float)
#bytes_per_packet = np.zeros((6, num_gpu), dtype=float)
print_flag = False
def set_print_flag(flag):
global print_flag
print_flag = flag
def init_array():
global nonzero_pixels_rate
global bytes_per_packet
nonzero_pixels_rate = np.zeros((6, num_gpu), dtype=float)
bytes_per_packet = np.zeros((6, num_gpu), dtype=float)
def get_array():
nonzero = nonzero_pixels_rate.mean(axis=1)
bytes_per = bytes_per_packet.mean(axis=1)
return nonzero, bytes_per
cfg = {
'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'VGG16_1': [64, 64, 'M'],
'VGG16_2': [128, 128, 'M'],
'VGG16_3': [256, 256, 256, 'M'],
'VGG16_4': [512, 512, 512, 'M'],
'VGG16_5': [512, 512, 512, 'M'],
'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
class Quant_ReLU(nn.Module):
def __init__(self, lower_bound=2.0, upper_bound=3.0, num_bits=4.):
super(Quant_ReLU, self).__init__()
# for each pieces, define a new conv operation
self.num_bits = num_bits
self.upper_bound = upper_bound
self.lower_bound = lower_bound
self.delta = (upper_bound - lower_bound) / (2 ** self.num_bits - 1)
def forward(self, x):
#print('x.shape is: ' + str(x.shape))
r1 = F.hardtanh(x, self.lower_bound, self.upper_bound) - self.lower_bound
#print('max is: ' + str(torch.max(x)))
################################################################
'''
#percent_nonzero_pixel = float(r1[r1>0].shape[0])/(x.shape[0]*x.shape[1]*(4*x.shape[2]-4))
#print('max is: ' + str(torch.max(x)))
#nonzero_pixel = float(r1[r1>0].shape[0])/(x.shape[0])
byte_per_pkt = float(r1[r1>0].shape[0])/(x.shape[0])*4./8
byte_per_pkt = byte_per_pkt/5. # change here for 2 by 2 or 4 by 4 partition
#bytes_per_packet.append(byte_per_pkt)
#print(r1[r1>0].shape[0])
#print(r1[r1>0].shape)
#total_length = r1.shape[1] * (2*(r1.shape[2] + r1.shape[3])-4)
#print(nonzero_pixel * np.log2(float(total_length)))
r2 = r1.cpu().detach().numpy()
gap_length = 1
s = r1[0,0:822].tolist()
#print('total_length' + str(len(s)))
counter = 0;
posi_num = 0;
for i in range(len(s)):
if((s[i] >= 0.001) or (counter == gap_length)):
counter = 0
posi_num = posi_num + 1;
counter = counter + 1;
nonzero_pixels.append(byte_per_pkt)
byte_per_pkt = byte_per_pkt + posi_num * np.log2(np.float(gap_length+1))/8
#print('total_pkt_size: ' + str(byte_per_pkt))
bytes_per_packet.append(byte_per_pkt)
'''
################################################################
# quantize the pixels on the margin
r1 = torch.round(r1 / self.delta) * self.delta
# applies different mask to the pixel in the middle and on the margin
r = r1
return r
class VGG(nn.Module):
def __init__(self, vgg_name, dataset, pieces=(2, 2), f12_pieces=(2, 2), loss_prob = 0.01, lower_bound=0.5, upper_bound=1.0):
super(VGG, self).__init__()
# only accept VGG16
#################################
self.loss_prob_linear = 0.01
self.lower_bound_linaer = 0.1
self.upper_bound_linaer = 0.65
self.num_bits_linear = 4
self.pieces_linear = 5
self.f12_pieces = (2,2)
#################################
self.shrink = self._shrink_input(3)
self.features1 = self._make_layers(cfg['VGG16_1'], 3)
self.features2 = self._make_layers(cfg['VGG16_2'], 64)
self.features3 = self._make_layers(cfg['VGG16_3'], 128)
self.features4 = self._make_layers(cfg['VGG16_4'], 256)
self.features5 = self._make_layers_layer5(cfg['VGG16_5'], 512)
self.features1_1 = self._make_layers(cfg['VGG16_1'], 3)
self.features2_1 = self._make_layers(cfg['VGG16_2'], 64)
self.features3_1 = self._make_layers(cfg['VGG16_3'], 128)
self.features4_1 = self._make_layers(cfg['VGG16_4'], 256)
self.features5_1 = self._make_layers_layer5(cfg['VGG16_5'], 512)
if dataset == 'Cifar10':
self.classifier = nn.Linear(512, 10)
elif dataset == 'Caltech256':
self.classifier = nn.Sequential(
Masked_Linear(23040, 4100, pieces = self.pieces_linear, loss_prob = self.loss_probloss_prob_linear, bias=True),
#nn.Linear(23040, 4096),
nn.BatchNorm1d(4100),
Quant_ReLU(lower_bound=self.lower_bound_linear, upper_bound=self.upper_bound_linear, num_bits=self.num_bits_linear),
#nn.ReLU(True),
nn.Linear(4100, 4100),
nn.BatchNorm1d(4100),
nn.ReLU(True),
nn.Linear(4100, 257)
)
elif dataset == 'Caltech101':
self.classifier = nn.Sequential(
Masked_Linear(23040, 4100, pieces = self.pieces_linear, loss_prob = self.loss_prob, bias=True),
#nn.Linear(23040, 4096),
nn.BatchNorm1d(4100),
#Quant_ReLU(lower_bound=self.lower_bound_linear, upper_bound=self.upper_bound_linear, num_bits=self.num_bits_linear),
nn.ReLU(True),
nn.Linear(4100, 4100),
nn.BatchNorm1d(4100),
nn.ReLU(True),
nn.Linear(4100, 101)
)
def forward(self, x):
# split x
# print("input x: ", x.shape)
# downsample the size of input x
x_shrink = self.shrink(x)
x_split = []
xx = torch.chunk(x, self.f12_pieces[0], 2)
for i in range(self.f12_pieces[0]):
xxx = torch.chunk(xx[i], self.f12_pieces[1], 3)
x_split.append(xxx)
out = []
for i in range(self.f12_pieces[0]):
dummy = []
for j in range(self.f12_pieces[1]):
#print('layer1 shape is: ' + str(x_split[i][j].shape))
rr = self.features1(x_split[i][j].cuda())
#print('layer2 shape is: ' + str(rr.shape))
rr = self.features2(rr.cuda())
#print('layer3 shape is: ' + str(rr.shape))
rr = self.features3(rr.cuda())
#print('layer4 shape is: ' + str(rr.shape))
rr = self.features4(rr.cuda())
#print('layer5 shape is: ' + str(rr.shape))
rr = self.features5(rr.cuda())
#print('layer6 shape is: ' + str(rr.shape))
dummy.append(rr)
dummy_cat = torch.cat((dummy[0: self.f12_pieces[1]]), 3)
out.append(dummy_cat)
out = torch.cat((out[0: self.f12_pieces[0]]), 2)
#print('out shape is: ' + str(out.shape))
out.cuda()
rr1 = self.features1_1(x_shrink.cuda())
rr1 = self.features2_1(rr1.cuda())
rr1 = self.features3_1(rr1.cuda())
rr1 = self.features4_1(rr1.cuda())
rr1 = self.features5_1(rr1.cuda())
rr1 = rr1.view(rr1.size(0),-1)
out = out.view(out.size(0), -1)
#print(rr1.shape)
#print('dddddd')
#print(out.shape)
out = torch.cat((out,rr1),1)
#print('combined out.shape is: '+ str(out.shape))
out = self.classifier(out)
#print(out.shape)
print('statistics is: ')
avg1 = np.average(np.asarray(bytes_per_packet))
#avg2 = np.average(np.asarray(nonzero_pixels))
print('bytes per pkt is: ' + str(avg1))
#print('number of nonzero pixels is ' + str(avg2))
return out
def _shrink_input(self, in_channels):
layers = []
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
return nn.Sequential(*layers)
def _make_layers(self, cfg, in_channels):
layers = []
for x in cfg:
if x == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
nn.BatchNorm2d(x),
nn.ReLU(inplace=True)]
in_channels = x
layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
return nn.Sequential(*layers)
def _make_layers_layer5(self, cfg, in_channels):
layers = []
for x in cfg:
if x == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
nn.BatchNorm2d(x),
nn.ReLU(inplace=True)]
in_channels = x
layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
return nn.Sequential(*layers)
def test():
net = VGG('VGG16', 'Caltech101', lower_bound=2.0, upper_bound=3.0, pieces=(2, 2), f12_pieces=(2, 2))
net = net.to('cuda')
net = torch.nn.DataParallel(net)
cudnn.benchmark = True
x = torch.randn(256, 3, 224, 224)
# x = torch.randn(128, 3, 32, 32)
init_array()
y = net(x)
# print_array()
# test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment