XinDongol/vgg_nocom.py

## vgg_nocom.py
# The CNN takes partitioned input as well as the input with lower resolution, and no communication is made between each VM

'''VGG11/13/16/19 in Pytorch.'''
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import random
import numpy as np
from input_activation import Lossy_Linear, Lossy_Quant_Linear, Masked_Linear
from markov_random import markov_rand
import time
global nonzero_pixels_rate
global bytes_per_packet
nonzero_pixels = []
bytes_per_packet = []
num_gpu = 2
#nonzero_pixels_rate = np.zeros((6, num_gpu), dtype=float)
#bytes_per_packet = np.zeros((6, num_gpu), dtype=float)
print_flag = False

def set_print_flag(flag):
    global print_flag
    print_flag = flag

def init_array():
    global nonzero_pixels_rate
    global bytes_per_packet
    nonzero_pixels_rate = np.zeros((6, num_gpu), dtype=float)
    bytes_per_packet = np.zeros((6, num_gpu), dtype=float)

def get_array():
    nonzero = nonzero_pixels_rate.mean(axis=1)
    bytes_per = bytes_per_packet.mean(axis=1)
    return nonzero, bytes_per


cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG16_1': [64, 64, 'M'],
    'VGG16_2': [128, 128, 'M'],
    'VGG16_3': [256, 256, 256, 'M'],
    'VGG16_4': [512, 512, 512, 'M'],
    'VGG16_5': [512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class Quant_ReLU(nn.Module):
    def __init__(self, lower_bound=2.0, upper_bound=3.0, num_bits=4.):
        super(Quant_ReLU, self).__init__()
        # for each pieces, define a new conv operation
        self.num_bits = num_bits
        self.upper_bound = upper_bound
        self.lower_bound = lower_bound
        self.delta = (upper_bound - lower_bound) / (2 ** self.num_bits - 1)

    def forward(self, x):
        #print('x.shape is: ' + str(x.shape))
        r1 = F.hardtanh(x, self.lower_bound, self.upper_bound) - self.lower_bound
        #print('max is: ' + str(torch.max(x)))
        ################################################################
        '''
        #percent_nonzero_pixel = float(r1[r1>0].shape[0])/(x.shape[0]*x.shape[1]*(4*x.shape[2]-4))
        #print('max is: ' + str(torch.max(x)))
        #nonzero_pixel = float(r1[r1>0].shape[0])/(x.shape[0])
        byte_per_pkt = float(r1[r1>0].shape[0])/(x.shape[0])*4./8
        byte_per_pkt = byte_per_pkt/5.  # change here for 2 by 2 or 4 by 4 partition
        #bytes_per_packet.append(byte_per_pkt)

        #print(r1[r1>0].shape[0])
        #print(r1[r1>0].shape)
        #total_length = r1.shape[1] * (2*(r1.shape[2] + r1.shape[3])-4)
        #print(nonzero_pixel * np.log2(float(total_length)))
        r2 = r1.cpu().detach().numpy()
        gap_length = 1
        s = r1[0,0:822].tolist()
        #print('total_length' + str(len(s)))
        counter = 0;
        posi_num = 0;
        for i in range(len(s)):
            if((s[i] >= 0.001) or (counter == gap_length)):
                counter = 0
                posi_num = posi_num + 1;
            counter = counter + 1;
        nonzero_pixels.append(byte_per_pkt)
        byte_per_pkt = byte_per_pkt + posi_num * np.log2(np.float(gap_length+1))/8
        #print('total_pkt_size: ' + str(byte_per_pkt))

        bytes_per_packet.append(byte_per_pkt)
        '''
        ################################################################
        # quantize the pixels on the margin
        r1 = torch.round(r1 / self.delta) * self.delta
        # applies different mask to the pixel in the middle and on the margin
        r = r1
        return r


class VGG(nn.Module):
    def __init__(self, vgg_name, dataset, pieces=(2, 2), f12_pieces=(2, 2), loss_prob = 0.01, lower_bound=0.5, upper_bound=1.0):
        super(VGG, self).__init__()
        # only accept VGG16
        #################################
        self.loss_prob_linear = 0.01
        self.lower_bound_linaer = 0.1
        self.upper_bound_linaer = 0.65
        self.num_bits_linear = 4
        self.pieces_linear = 5
        self.f12_pieces = (2,2)
        #################################
        self.shrink = self._shrink_input(3)
        self.features1 = self._make_layers(cfg['VGG16_1'], 3)
        self.features2 = self._make_layers(cfg['VGG16_2'], 64)
        self.features3 = self._make_layers(cfg['VGG16_3'], 128)
        self.features4 = self._make_layers(cfg['VGG16_4'], 256)
        self.features5 = self._make_layers_layer5(cfg['VGG16_5'], 512)
        self.features1_1 = self._make_layers(cfg['VGG16_1'], 3)
        self.features2_1 = self._make_layers(cfg['VGG16_2'], 64)
        self.features3_1 = self._make_layers(cfg['VGG16_3'], 128)
        self.features4_1 = self._make_layers(cfg['VGG16_4'], 256)
        self.features5_1 = self._make_layers_layer5(cfg['VGG16_5'], 512)
        if dataset == 'Cifar10':
            self.classifier = nn.Linear(512, 10)
        elif dataset == 'Caltech256':
                self.classifier = nn.Sequential(
                    Masked_Linear(23040, 4100, pieces = self.pieces_linear, loss_prob = self.loss_probloss_prob_linear, bias=True),
                    #nn.Linear(23040, 4096),
                    nn.BatchNorm1d(4100),
                    Quant_ReLU(lower_bound=self.lower_bound_linear, upper_bound=self.upper_bound_linear, num_bits=self.num_bits_linear),
                    #nn.ReLU(True),
                    nn.Linear(4100, 4100),
                    nn.BatchNorm1d(4100),
                    nn.ReLU(True),
                    nn.Linear(4100, 257)
                )
        elif dataset == 'Caltech101':
                self.classifier = nn.Sequential(
                    Masked_Linear(23040, 4100, pieces = self.pieces_linear, loss_prob = self.loss_prob, bias=True),
                    #nn.Linear(23040, 4096),
                    nn.BatchNorm1d(4100),
                    #Quant_ReLU(lower_bound=self.lower_bound_linear, upper_bound=self.upper_bound_linear, num_bits=self.num_bits_linear),
                    nn.ReLU(True),
                    nn.Linear(4100, 4100),
                    nn.BatchNorm1d(4100),
                    nn.ReLU(True),
                    nn.Linear(4100, 101)
                )

    def forward(self, x):
        # split x
        # print("input x: ", x.shape)
        # downsample the size of input x
        x_shrink = self.shrink(x)
        x_split = []
        xx = torch.chunk(x, self.f12_pieces[0], 2)
        for i in range(self.f12_pieces[0]):
            xxx = torch.chunk(xx[i], self.f12_pieces[1], 3)
            x_split.append(xxx)

        out = []
        for i in range(self.f12_pieces[0]):
            dummy = []
            for j in range(self.f12_pieces[1]):
                #print('layer1 shape is: ' + str(x_split[i][j].shape))
                rr = self.features1(x_split[i][j].cuda())
                #print('layer2 shape is: ' + str(rr.shape))
                rr = self.features2(rr.cuda())
                #print('layer3 shape is: ' + str(rr.shape))
                rr = self.features3(rr.cuda())
                #print('layer4 shape is: ' + str(rr.shape))
                rr = self.features4(rr.cuda())
                #print('layer5 shape is: ' + str(rr.shape))
                rr = self.features5(rr.cuda())
                #print('layer6 shape is: ' + str(rr.shape))
                dummy.append(rr)
            dummy_cat = torch.cat((dummy[0: self.f12_pieces[1]]), 3)
            out.append(dummy_cat)
        out = torch.cat((out[0: self.f12_pieces[0]]), 2)
        #print('out shape is: ' + str(out.shape))
        out.cuda()
        rr1 = self.features1_1(x_shrink.cuda())
        rr1 = self.features2_1(rr1.cuda())
        rr1 = self.features3_1(rr1.cuda())
        rr1 = self.features4_1(rr1.cuda())
        rr1 = self.features5_1(rr1.cuda())
        rr1 = rr1.view(rr1.size(0),-1)
        out = out.view(out.size(0), -1)
        #print(rr1.shape)
        #print('dddddd')
        #print(out.shape)
        out = torch.cat((out,rr1),1)
        #print('combined out.shape is: '+ str(out.shape))
        out = self.classifier(out)
        #print(out.shape)
        print('statistics is: ')
        avg1 = np.average(np.asarray(bytes_per_packet))
        #avg2 = np.average(np.asarray(nonzero_pixels))
        print('bytes per pkt is: ' + str(avg1))
        #print('number of nonzero pixels is ' + str(avg2))
        return out


    def _shrink_input(self, in_channels):
        layers = []
        layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        return nn.Sequential(*layers)

    def _make_layers(self, cfg, in_channels):
        layers = []
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

    def _make_layers_layer5(self, cfg, in_channels):
        layers = []
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

def test():
    net = VGG('VGG16', 'Caltech101', lower_bound=2.0, upper_bound=3.0, pieces=(2, 2), f12_pieces=(2, 2))
    net = net.to('cuda')
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True
    x = torch.randn(256, 3, 224, 224)
    # x = torch.randn(128, 3, 32, 32)
    init_array()
    y = net(x)
    # print_array()


# test()
	# The CNN takes partitioned input as well as the input with lower resolution, and no communication is made between each VM

	'''VGG11/13/16/19 in Pytorch.'''
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import torch.backends.cudnn as cudnn
	import random
	import numpy as np
	from input_activation import Lossy_Linear, Lossy_Quant_Linear, Masked_Linear
	from markov_random import markov_rand
	import time
	global nonzero_pixels_rate
	global bytes_per_packet
	nonzero_pixels = []
	bytes_per_packet = []
	num_gpu = 2
	#nonzero_pixels_rate = np.zeros((6, num_gpu), dtype=float)
	#bytes_per_packet = np.zeros((6, num_gpu), dtype=float)
	print_flag = False

	def set_print_flag(flag):
	global print_flag
	print_flag = flag

	def init_array():
	global nonzero_pixels_rate
	global bytes_per_packet
	nonzero_pixels_rate = np.zeros((6, num_gpu), dtype=float)
	bytes_per_packet = np.zeros((6, num_gpu), dtype=float)

	def get_array():
	nonzero = nonzero_pixels_rate.mean(axis=1)
	bytes_per = bytes_per_packet.mean(axis=1)
	return nonzero, bytes_per


	cfg = {
	'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
	'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
	'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
	'VGG16_1': [64, 64, 'M'],
	'VGG16_2': [128, 128, 'M'],
	'VGG16_3': [256, 256, 256, 'M'],
	'VGG16_4': [512, 512, 512, 'M'],
	'VGG16_5': [512, 512, 512, 'M'],
	'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
	}



	class Quant_ReLU(nn.Module):
	def __init__(self, lower_bound=2.0, upper_bound=3.0, num_bits=4.):
	super(Quant_ReLU, self).__init__()
	# for each pieces, define a new conv operation
	self.num_bits = num_bits
	self.upper_bound = upper_bound
	self.lower_bound = lower_bound
	self.delta = (upper_bound - lower_bound) / (2 ** self.num_bits - 1)

	def forward(self, x):
	#print('x.shape is: ' + str(x.shape))
	r1 = F.hardtanh(x, self.lower_bound, self.upper_bound) - self.lower_bound
	#print('max is: ' + str(torch.max(x)))
	################################################################
	'''
	#percent_nonzero_pixel = float(r1[r1>0].shape[0])/(x.shape[0]x.shape[1](4*x.shape[2]-4))
	#print('max is: ' + str(torch.max(x)))
	#nonzero_pixel = float(r1[r1>0].shape[0])/(x.shape[0])
	byte_per_pkt = float(r1[r1>0].shape[0])/(x.shape[0])*4./8
	byte_per_pkt = byte_per_pkt/5. # change here for 2 by 2 or 4 by 4 partition
	#bytes_per_packet.append(byte_per_pkt)

	#print(r1[r1>0].shape[0])
	#print(r1[r1>0].shape)
	#total_length = r1.shape[1] * (2*(r1.shape[2] + r1.shape[3])-4)
	#print(nonzero_pixel * np.log2(float(total_length)))
	r2 = r1.cpu().detach().numpy()
	gap_length = 1
	s = r1[0,0:822].tolist()
	#print('total_length' + str(len(s)))
	counter = 0;
	posi_num = 0;
	for i in range(len(s)):
	if((s[i] >= 0.001) or (counter == gap_length)):
	counter = 0
	posi_num = posi_num + 1;
	counter = counter + 1;
	nonzero_pixels.append(byte_per_pkt)
	byte_per_pkt = byte_per_pkt + posi_num * np.log2(np.float(gap_length+1))/8
	#print('total_pkt_size: ' + str(byte_per_pkt))

	bytes_per_packet.append(byte_per_pkt)
	'''
	################################################################
	# quantize the pixels on the margin
	r1 = torch.round(r1 / self.delta) * self.delta
	# applies different mask to the pixel in the middle and on the margin
	r = r1
	return r


	class VGG(nn.Module):
	def __init__(self, vgg_name, dataset, pieces=(2, 2), f12_pieces=(2, 2), loss_prob = 0.01, lower_bound=0.5, upper_bound=1.0):
	super(VGG, self).__init__()
	# only accept VGG16
	#################################
	self.loss_prob_linear = 0.01
	self.lower_bound_linaer = 0.1
	self.upper_bound_linaer = 0.65
	self.num_bits_linear = 4
	self.pieces_linear = 5
	self.f12_pieces = (2,2)
	#################################
	self.shrink = self._shrink_input(3)
	self.features1 = self._make_layers(cfg['VGG16_1'], 3)
	self.features2 = self._make_layers(cfg['VGG16_2'], 64)
	self.features3 = self._make_layers(cfg['VGG16_3'], 128)
	self.features4 = self._make_layers(cfg['VGG16_4'], 256)
	self.features5 = self._make_layers_layer5(cfg['VGG16_5'], 512)
	self.features1_1 = self._make_layers(cfg['VGG16_1'], 3)
	self.features2_1 = self._make_layers(cfg['VGG16_2'], 64)
	self.features3_1 = self._make_layers(cfg['VGG16_3'], 128)
	self.features4_1 = self._make_layers(cfg['VGG16_4'], 256)
	self.features5_1 = self._make_layers_layer5(cfg['VGG16_5'], 512)
	if dataset == 'Cifar10':
	self.classifier = nn.Linear(512, 10)
	elif dataset == 'Caltech256':
	self.classifier = nn.Sequential(
	Masked_Linear(23040, 4100, pieces = self.pieces_linear, loss_prob = self.loss_probloss_prob_linear, bias=True),
	#nn.Linear(23040, 4096),
	nn.BatchNorm1d(4100),
	Quant_ReLU(lower_bound=self.lower_bound_linear, upper_bound=self.upper_bound_linear, num_bits=self.num_bits_linear),
	#nn.ReLU(True),
	nn.Linear(4100, 4100),
	nn.BatchNorm1d(4100),
	nn.ReLU(True),
	nn.Linear(4100, 257)
	)
	elif dataset == 'Caltech101':
	self.classifier = nn.Sequential(
	Masked_Linear(23040, 4100, pieces = self.pieces_linear, loss_prob = self.loss_prob, bias=True),
	#nn.Linear(23040, 4096),
	nn.BatchNorm1d(4100),
	#Quant_ReLU(lower_bound=self.lower_bound_linear, upper_bound=self.upper_bound_linear, num_bits=self.num_bits_linear),
	nn.ReLU(True),
	nn.Linear(4100, 4100),
	nn.BatchNorm1d(4100),
	nn.ReLU(True),
	nn.Linear(4100, 101)
	)

	def forward(self, x):
	# split x
	# print("input x: ", x.shape)
	# downsample the size of input x
	x_shrink = self.shrink(x)
	x_split = []
	xx = torch.chunk(x, self.f12_pieces[0], 2)
	for i in range(self.f12_pieces[0]):
	xxx = torch.chunk(xx[i], self.f12_pieces[1], 3)
	x_split.append(xxx)

	out = []
	for i in range(self.f12_pieces[0]):
	dummy = []
	for j in range(self.f12_pieces[1]):
	#print('layer1 shape is: ' + str(x_split[i][j].shape))
	rr = self.features1(x_split[i][j].cuda())
	#print('layer2 shape is: ' + str(rr.shape))
	rr = self.features2(rr.cuda())
	#print('layer3 shape is: ' + str(rr.shape))
	rr = self.features3(rr.cuda())
	#print('layer4 shape is: ' + str(rr.shape))
	rr = self.features4(rr.cuda())
	#print('layer5 shape is: ' + str(rr.shape))
	rr = self.features5(rr.cuda())
	#print('layer6 shape is: ' + str(rr.shape))
	dummy.append(rr)
	dummy_cat = torch.cat((dummy[0: self.f12_pieces[1]]), 3)
	out.append(dummy_cat)
	out = torch.cat((out[0: self.f12_pieces[0]]), 2)
	#print('out shape is: ' + str(out.shape))
	out.cuda()
	rr1 = self.features1_1(x_shrink.cuda())
	rr1 = self.features2_1(rr1.cuda())
	rr1 = self.features3_1(rr1.cuda())
	rr1 = self.features4_1(rr1.cuda())
	rr1 = self.features5_1(rr1.cuda())
	rr1 = rr1.view(rr1.size(0),-1)
	out = out.view(out.size(0), -1)
	#print(rr1.shape)
	#print('dddddd')
	#print(out.shape)
	out = torch.cat((out,rr1),1)
	#print('combined out.shape is: '+ str(out.shape))
	out = self.classifier(out)
	#print(out.shape)
	print('statistics is: ')
	avg1 = np.average(np.asarray(bytes_per_packet))
	#avg2 = np.average(np.asarray(nonzero_pixels))
	print('bytes per pkt is: ' + str(avg1))
	#print('number of nonzero pixels is ' + str(avg2))
	return out


	def _shrink_input(self, in_channels):
	layers = []
	layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
	return nn.Sequential(*layers)

	def _make_layers(self, cfg, in_channels):
	layers = []
	for x in cfg:
	if x == 'M':
	layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
	else:
	layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
	nn.BatchNorm2d(x),
	nn.ReLU(inplace=True)]
	in_channels = x
	layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
	return nn.Sequential(*layers)

	def _make_layers_layer5(self, cfg, in_channels):
	layers = []
	for x in cfg:
	if x == 'M':
	layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
	else:
	layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
	nn.BatchNorm2d(x),
	nn.ReLU(inplace=True)]
	in_channels = x
	layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
	return nn.Sequential(*layers)

	def test():
	net = VGG('VGG16', 'Caltech101', lower_bound=2.0, upper_bound=3.0, pieces=(2, 2), f12_pieces=(2, 2))
	net = net.to('cuda')
	net = torch.nn.DataParallel(net)
	cudnn.benchmark = True
	x = torch.randn(256, 3, 224, 224)
	# x = torch.randn(128, 3, 32, 32)
	init_array()
	y = net(x)
	# print_array()


	# test()