mariohm1311/F_activations.py

## activations.py
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class Swish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        p = F.sigmoid(x)
        p = p.mul(x)
        return p

    def to(self, device):
        return self.to(device)

class Swish_beta(nn.Module):
    def __init__(self):
        super().__init__()
        self.beta = nn.Parameter(torch.Tensor([1]), requires_grad=True)

    def forward(self, x):
        p = F.sigmoid(self.beta*x)
        p = p.mul(x)
        return p

    def to(self, device):
        self.beta = self.beta.to(device)
        return self.to(device)

## cifar_models.py
import torch
import torch.nn as nn
import torch.nn.functional as F
from time import time
import os
import activations as act
import F_activations as F_act


class DenseNet(nn.Module):
    def __init__(self, NUM_CLASSES=10):
        super().__init__()
        self.size1 = 64
        self.size2 = 64
        self.NUM_CLASSES = NUM_CLASSES

        self.fc1 = nn.Linear(3*32**2, self.size1)
        self.bn1 = nn.BatchNorm1d(self.size1)
        self.fc2 = nn.Linear(self.size1, self.size2)
        self.bn2 = nn.BatchNorm1d(self.size2)
        self.fc3 = nn.Linear(self.size2, self.NUM_CLASSES)

        self._initialize_weights()

    def _initialize_weights(self):
        gain = nn.init.calculate_gain(nonlinearity='leaky_relu')
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight, gain)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = x.view(-1, 3*32**2)
        x = self.bn1(F.relu(self.fc1(x)))
        x = self.bn2(F.relu(self.fc2(x)))
        if self.training:
            x = self.fc3(x)
        else:
            x = F.softmax(self.fc3(x), dim=1)
        return x

    def save_state(self, optimizer, epoch, acc, time=0, tag=None):
        folder = './models_training/cifar_net/DenseNet_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        state = {
            'epoch': epoch,
            'state_dict': self.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        if tag is not None:
            filepath = 'model_DenseNet_ep{:d}_acc{:.3f}_{:s}'.format(
                epoch, float(acc), str(tag)
            )
        else:
            filepath = 'model_DenseNet_ep{:d}_acc{:.3f}'.format(
                epoch, float(acc)
            )
        torch.save(state, folder+filepath)

    def save_model(self, acc, time=0):
        folder = './models_trained/cifar_net/DenseNet_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        filepath = 'model_DenseNet_acc{:.3f}'.format(float(acc))
        torch.save(self.state_dict(), folder+filepath)


class CNN(nn.Module):
    def __init__(self, NUM_CLASSES=10):
        super().__init__()
        self.size1 = 64
        self.size2 = 128
        self.NUM_CLASSES = NUM_CLASSES

        self.conv1 = nn.Conv2d(3, self.size1*8, 5, stride=2)
        self.bn1 = nn.BatchNorm2d(self.size1)
        self.conv2 = nn.Conv2d(self.size1, self.size2*8, 5, stride=2)
        self.bn2 = nn.BatchNorm2d(self.size2)
        self.fc1 = nn.Linear(self.size2 * 5 * 5, self.size1)
        self.bn3 = nn.BatchNorm1d(self.size1)
        self.fc2 = nn.Linear(self.size1, self.NUM_CLASSES)

        self._initialize_weights()

    def _initialize_weights(self):
        gain = nn.init.calculate_gain(nonlinearity='leaky_relu')
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight, gain)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.bn1(F_act.maxout(self.conv1(x),8))
        x = self.bn2(F_act.maxout(self.conv2(x),8))
        x = x.view(-1, self.size2 * 5 * 5)
        x = self.bn3(F.relu(self.fc1(x)))
        if self.training:
            x = self.fc2(x)
        else:
            x = F.softmax(self.fc2(x), dim=1)
        return x

    def save_state(self, optimizer, epoch, acc, time=0, tag=None):
        folder = './models_training/cifar_net/CNN_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        state = {
            'epoch': epoch,
            'state_dict': self.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        if tag is not None:
            filepath = 'model_CNN_ep{:d}_acc{:.3f}_{:s}'.format(
                epoch, float(acc), str(tag)
            )
        else:
            filepath = 'model_CNN_ep{:d}_acc{:.3f}'.format(
                epoch, float(acc)
            )
        torch.save(state, folder+filepath)

    def save_model(self, acc, time=0):
        folder = './models_trained/cifar_net/CNN_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        filepath = 'model_CNN_acc{:.3f}'.format(float(acc))
        torch.save(self.state_dict(), folder+filepath)


class FCNN(nn.Module):
    def __init__(self, NUM_CLASSES=10):
        super().__init__()
        self.size1 = 64
        self.size2 = 96
        self.size3 = 128
        self.NUM_CLASSES = NUM_CLASSES

        self.conv1 = nn.Conv2d(3, self.size1, 5, stride=2)
        self.bn1 = nn.BatchNorm2d(self.size1)
        self.conv2 = nn.Conv2d(self.size1, self.size2, 5, stride=2)
        self.bn2 = nn.BatchNorm2d(self.size2)
        self.conv3 = nn.Conv2d(self.size2, self.size3, 5, stride=2)
        self.bn3 = nn.BatchNorm2d(self.size3)
        self.conv4 = nn.Conv2d(self.size3, self.NUM_CLASSES, 1)

        self._initialize_weights()

    def _initialize_weights(self):
        gain = nn.init.calculate_gain(nonlinearity='leaky_relu')
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight, gain)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = (F.relu(self.conv1(x)))
        x = (F.relu(self.conv2(x)))
        x = (F.relu(self.conv3(x)))

        if self.training:
            x = self.conv4(x)
            x = x.view(-1, self.NUM_CLASSES)
        else:
            x = self.conv4(x)
            x = x.view(-1, self.NUM_CLASSES)
            x = F.softmax(x, dim=1)

        return x

    def save_state(self, optimizer, epoch, acc, time=0, tag=None):
        folder = './models_training/cifar_net/FCNN_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        state = {
            'epoch': epoch,
            'state_dict': self.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        if tag is not None:
            filepath = 'model_FCNN_ep{:d}_acc{:.3f}_{:s}'.format(
                epoch, float(acc), str(tag)
            )
        else:
            filepath = 'model_FCNN_ep{:d}_acc{:.3f}'.format(
                epoch, float(acc)
            )
        torch.save(state, folder+filepath)

    def save_model(self, acc, time=0):
        folder = './models_trained/cifar_net/FCNN_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        filepath = 'model_FCNN_acc{:.3f}'.format(float(acc))
        torch.save(self.state_dict(), folder+filepath)


class Inception(nn.Module):
    def __init__(self, input_depth, output_depth):
        super().__init__()

        self.b1 = nn.Sequential(
            nn.Conv2d(input_depth, output_depth//4, 1),
            nn.BatchNorm2d(output_depth//4),
            nn.ReLU(),
        )

        self.b2 = nn.Sequential(
            nn.Conv2d(input_depth, output_depth//4, 1),
            nn.BatchNorm2d(output_depth//4),
            nn.ReLU(),
            nn.ReplicationPad2d(1),
            nn.Conv2d(output_depth//4, output_depth//4, 3),
            nn.BatchNorm2d(output_depth//4),
            nn.ReLU(),
        )

        self.b3 = nn.Sequential(
            nn.Conv2d(input_depth, output_depth//4, 1),
            nn.BatchNorm2d(output_depth//4),
            nn.ReLU(),
            nn.ReplicationPad2d(2),
            nn.Conv2d(output_depth//4, output_depth//4, 5),
            nn.BatchNorm2d(output_depth//4),
            nn.ReLU(),
        )

        self.b4 = nn.Sequential(
            nn.ReplicationPad2d(1),
            nn.MaxPool2d(3,1),
            nn.Conv2d(input_depth, output_depth//4, 1),
            nn.BatchNorm2d(output_depth//4),
            nn.ReLU(),
        )

    def forward(self, x):
        x1 = self.b1(x)
        x2 = self.b2(x)
        x3 = self.b3(x)
        x4 = self.b4(x)
        return torch.cat([x1,x2,x3,x4], dim=1)


class GoogLeNet(nn.Module):
    def __init__(self, inc1_depth=(64,64),
                       inc2_depth=(64,64),
                       inc3_depth=(64,64),
                       NUM_CLASSES=10):
        super().__init__()
        self.ic1_depth = inc1_depth
        self.inc2_depth = inc2_depth
        self.inc3_depth = inc3_depth

        self.pad3x3 = nn.ReplicationPad2d(1)
        self.conv_in1 = nn.Conv2d(3, self.ic1_depth[0], 3)
        self.bn_conv1 = nn.BatchNorm2d(self.ic1_depth[0])
        self.conv_in2 = nn.Conv2d(self.ic1_depth[0], self.ic1_depth[0], 3)

        self.inc1 = Inception(self.ic1_depth[0], self.ic1_depth[1])
        self.inc2 = Inception(self.inc2_depth[0], self.inc2_depth[1])
        self.inc3 = Inception(self.inc3_depth[0], self.inc3_depth[1])

        self.conv_out1 = nn.Conv2d(self.inc3_depth[1], self.inc3_depth[1], 5, stride=2)
        self.conv_out2 = nn.Conv2d(self.inc3_depth[1], self.inc3_depth[1], 5, stride=2)
        self.bn_conv2 = nn.BatchNorm2d(self.inc3_depth[1])
        self.fc1 = nn.Linear(self.inc3_depth[1] * 5 * 5, self.inc3_depth[1])
        self.bn_fc1 = nn.BatchNorm1d(self.inc3_depth[1])
        self.fc2 = nn.Linear(self.inc3_depth[1], NUM_CLASSES)

        self._initialize_weights()

    def _initialize_weights(self):
        gain = nn.init.calculate_gain(nonlinearity='relu')
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight, gain)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.pad3x3(x)
        x = self.bn_conv1(F.relu(self.conv_in1(x)))
        x = self.pad3x3(x)
        x = self.bn_conv1(F.relu(self.conv_in2(x)))

        x = self.inc1(x)
        x = self.inc2(x)
        x = self.inc3(x)
        x = self.bn_conv2(F.relu(self.conv_out1(x)))
        x = self.bn_conv2(F.relu(self.conv_out2(x)))
        x = x.view(-1, self.inc3_depth[1] * 5 * 5)
        x = self.bn_fc1(F.relu(self.fc1(x)))
        if self.training:
            x = self.fc2(x)
        else:
            x = F.softmax(self.fc2(x), dim=1)
        return x

    def save_state(self, optimizer, epoch, acc, time=0, tag=None):
        folder = './models_training/cifar_net/GoogLeNet_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        state = {
            'epoch': epoch,
            'state_dict': self.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        if tag is not None:
            filepath = 'model_GoogLeNet_ep{:d}_acc{:.3f}_{:s}'.format(
                epoch, float(acc), str(tag)
            )
        else:
            filepath = 'model_GoogLeNet_ep{:d}_acc{:.3f}'.format(
                epoch, float(acc)
            )
        torch.save(state, folder+filepath)

    def save_model(self, acc, time=0):
        folder = './models_trained/cifar_net/GoogLeNet_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        filepath = 'model_GoogLeNet_acc{:.3f}'.format(float(acc))
        torch.save(self.state_dict(), folder+filepath)


class GoogLeNet2(nn.Module):
    def __init__(self, inc1_depth=(64,128),
                       inc2_depth=(128,128),
                       NUM_CLASSES=10):
        super().__init__()
        self.ic1_depth = inc1_depth
        self.inc2_depth = inc2_depth

        self.pad3x3 = nn.ReplicationPad2d(1)
        self.conv_in1 = nn.Conv2d(3, self.ic1_depth[0], 3)
        self.bn_conv1 = nn.BatchNorm2d(self.ic1_depth[0])
        # self.conv_in2 = nn.Conv2d(self.ic1_depth[0], self.ic1_depth[0], 3)

        self.inc1 = Inception(self.ic1_depth[0], self.ic1_depth[1])
        self.inc2 = Inception(self.inc2_depth[0], self.inc2_depth[1])

        # self.conv_out1 = nn.Conv2d(self.inc2_depth[1], self.inc2_depth[1], 5, stride=2)
        self.conv_out1 = nn.Conv2d(self.inc2_depth[1], self.inc2_depth[1], 5, stride=4)
        # self.conv_out2 = nn.Conv2d(self.inc2_depth[1], self.inc2_depth[1], 5, stride=2)
        self.bn_conv2 = nn.BatchNorm2d(self.inc2_depth[1])
        # self.fc1 = nn.Linear(self.inc2_depth[1] * 5 * 5, 256)
        self.fc1 = nn.Linear(self.inc2_depth[1] * 7 * 7, 128)
        self.bn_fc1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, NUM_CLASSES)

        self._initialize_weights()

    def _initialize_weights(self):
        gain = nn.init.calculate_gain(nonlinearity='relu')
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight, gain)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.pad3x3(x)
        x = self.bn_conv1(F.relu(self.conv_in1(x)))
        # x = self.pad3x3(x)
        # x = self.bn_conv1(F.relu(self.conv_in2(x)))

        x = self.inc1(x)
        x = self.inc2(x)

        x = self.bn_conv2(F.relu(self.conv_out1(x)))
        # x = self.bn_conv2(F.relu(self.conv_out2(x)))
        # x = x.view(-1, self.inc2_depth[1] * 5 * 5)
        x = x.view(-1, self.inc2_depth[1] * 7 * 7)
        x = self.bn_fc1(F.relu(self.fc1(x)))
        if self.training:
            x = self.fc2(x)
        else:
            x = F.softmax(self.fc2(x), dim=1)
        return x

    def save_state(self, optimizer, epoch, acc, time=0, tag=None):
        folder = './models_training/cifar_net/GoogLeNet2_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        state = {
            'epoch': epoch,
            'state_dict': self.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        if tag is not None:
            filepath = 'model_GoogLeNet2_ep{:d}_acc{:.3f}_{:s}'.format(
                epoch, float(acc), str(tag)
            )
        else:
            filepath = 'model_GoogLeNet2_ep{:d}_acc{:.3f}'.format(
                epoch, float(acc)
            )
        torch.save(state, folder+filepath)

    def save_model(self, acc, time=0):
        folder = './models_trained/cifar_net/GoogLeNet2_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        filepath = 'model_GoogLeNet2_acc{:.3f}'.format(float(acc))
        torch.save(self.state_dict(), folder+filepath)


class DeepCNN(nn.Module):
    def __init__(self, NUM_CLASSES=10):
        super().__init__()
        self.size1 = 64
        self.size2 = 128
        self.size3 = 256
        self.NUM_CLASSES = NUM_CLASSES

        self.pad5x5 = nn.ReplicationPad2d(2)

        self.conv1 = nn.Conv2d(3, self.size1, 5, stride=1)
        self.bn1 = nn.BatchNorm2d(self.size1)
        self.conv2 = nn.Conv2d(self.size1, self.size2, 5, stride=1)
        self.bn2 = nn.BatchNorm2d(self.size2)
        self.conv3 = nn.Conv2d(self.size2, self.size3, 5, stride=2)
        self.bn3 = nn.BatchNorm2d(self.size3)
        self.conv4 = nn.Conv2d(self.size3, self.size3, 5, stride=2)
        self.bn4 = nn.BatchNorm2d(self.size3)


        self.fc1 = nn.Linear(self.size3 * 5 * 5, self.size2)
        self.bn_fc1 = nn.BatchNorm1d(self.size2)
        self.fc2 = nn.Linear(self.size2, self.NUM_CLASSES)

        self._initialize_weights()

    def _initialize_weights(self):
        gain = nn.init.calculate_gain(nonlinearity='leaky_relu')
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight, gain)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.pad5x5(x)
        x = self.bn1(F.leaky_relu(self.conv1(x)))
        x = self.pad5x5(x)
        x = self.bn2(F.leaky_relu(self.conv2(x)))
        x = self.bn3(F.leaky_relu(self.conv3(x)))
        x = self.bn4(F.leaky_relu(self.conv4(x)))

        x = x.view(-1, self.size3 * 5 * 5)
        x = self.bn_fc1(F.leaky_relu(self.fc1(x)))
        if self.training:
            x = self.fc2(x)
        else:
            x = F.softmax(self.fc2(x), dim=1)
        return x

    def save_state(self, optimizer, epoch, acc, time=0, tag=None):
        folder = './models_training/cifar_net/DeepCNN_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        state = {
            'epoch': epoch,
            'state_dict': self.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        if tag is not None:
            filepath = 'model_DeepCNN_ep{:d}_acc{:.3f}_{:s}'.format(
                epoch, float(acc), str(tag)
            )
        else:
            filepath = 'model_DeepCNN_ep{:d}_acc{:.3f}'.format(
                epoch, float(acc)
            )
        torch.save(state, folder+filepath)

    def save_model(self, acc, time=0):
        folder = './models_trained/cifar_net/DeepCNN_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        filepath = 'model_DeepCNN_acc{:.3f}'.format(float(acc))
        torch.save(self.state_dict(), folder+filepath)


class NiN(nn.Module):
    def __init__(self, filters=(3,192,160,96), kernels=(5,1,1),
                                pooling_type='max', pooling_stride=2,
                                pooling_kernel=3, dropout=0.5):
        super().__init__()

        self.conv1 = nn.Conv2d(filters[0], filters[1], kernel_size=kernels[0],
                               stride=1, padding=(kernels[0]-1)/2)
        self.conv2 = nn.Conv2d(filters[1], filters[2], kernel_size=kernels[1],
                               stride=1, padding=(kernels[1]-1)/2)
        self.conv3 = nn.Conv2d(filters[2], filters[3], kernel_size=kernels[2],
                               stride=1, padding=(kernels[2]-1)/2)
        if pooling_type == 'max':
            self.pool = nn.MaxPool2d(kernel_size=pooling_kernel,
                                     stride=pooling_stride)
        elif pooling_type == 'avg':
            self.pool = nn.AvgPool2d(kernel_size=pooling_kernel,
                                     stride=pooling_stride)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = self.dropout(x)
        return x


class NiN_wBN(nn.Module):
    def __init__(self, filters=(3,192,160,96), kernels=(5,1,1),
                                pooling_type='max', pooling_stride=2,
                                pooling_kernel=3, dropout=0.5):
        super().__init__()

        self.conv1 = nn.Conv2d(filters[0], filters[1], kernel_size=kernels[0],
                               stride=1, padding=(kernels[0]-1)/2)
        self.bn1 = nn.BatchNorm2d(filters[1])
        self.conv2 = nn.Conv2d(filters[1], filters[2], kernel_size=kernels[1],
                               stride=1, padding=(kernels[1]-1)/2)
        self.bn2 = nn.BatchNorm2d(filters[2])
        self.conv3 = nn.Conv2d(filters[2], filters[3], kernel_size=kernels[2],
                               stride=1, padding=(kernels[2]-1)/2)
        self.bn3 = nn.BatchNorm2d(filters[3])
        if pooling_type == 'max':
            self.pool = nn.MaxPool2d(kernel_size=pooling_kernel,
                                     stride=pooling_stride)
        elif pooling_type == 'avg':
            self.pool = nn.AvgPool2d(kernel_size=pooling_kernel,
                                     stride=pooling_stride)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.bn1(F.relu(self.conv1(x)))
        x = self.bn2(F.relu(self.conv2(x)))
        x = self.bn3(F.relu(self.conv3(x)))
        x = self.pool(x)
        x = self.dropout(x)
        return x


class MaxoutNiN(nn.Module):
    def __init__(self, filters=(3,192,160,96), kernels=(5,1,1), maxout_units=3,
                                pooling_type='max', pooling_stride=2,
                                pooling_kernel=3, dropout=0.5):
        super().__init__()
        self.mo_units = maxout_units

        self.conv1 = nn.Conv2d(filters[0], filters[1]*self.mo_units,
                               kernel_size=kernels[0], stride=1,
                               padding=(kernels[0]-1)/2)
        self.bn1 = nn.BatchNorm2d(filters[1])
        self.conv2 = nn.Conv2d(filters[1], filters[2]*self.mo_units,
                               kernel_size=kernels[1], stride=1,
                               padding=(kernels[1]-1)/2)
        self.bn2 = nn.BatchNorm2d(filters[2])
        self.conv3 = nn.Conv2d(filters[2], filters[3]*self.mo_units,
                               kernel_size=kernels[2], stride=1,
                               padding=(kernels[2]-1)/2)
        self.bn3 = nn.BatchNorm2d(filters[3])
        if pooling_type == 'max':
            self.pool = nn.MaxPool2d(kernel_size=pooling_kernel,
                                     stride=pooling_stride)
        elif pooling_type == 'avg':
            self.pool = nn.AvgPool2d(kernel_size=pooling_kernel,
                                     stride=pooling_stride)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.bn1(F_act.maxout(self.conv1(x), self.mo_units))
        x = self.bn2(F_act.maxout(self.conv2(x), self.mo_units))
        x = self.bn3(F_act.maxout(self.conv3(x), self.mo_units))
        x = self.pool(x)
        x = self.dropout(x)
        return x


class NiNnet(nn.Module):
    def __init__(self, NUM_CLASSES=10):
        super().__init__()
        self.NUM_CLASSES = NUM_CLASSES

        self.NiN1 = NiN(filters=(3,192,160,96), kernels=(5,1,1),
                        pooling_type='max', pooling_stride=2,
                        pooling_kernel=3, dropout=0.5)
        self.NiN2 = NiN(filters=(96,192,192,192), kernels=(5,1,1),
                        pooling_type='avg', pooling_stride=2,
                        pooling_kernel=3, dropout=0.5)
        self.NiN3 = NiN(filters=(192,192,192,self.NUM_CLASSES), kernels=(3,1,1),
                        pooling_type='avg', pooling_stride=2,
                        pooling_kernel=7, dropout=0)

        self._initialize_weights()

    def _initialize_weights(self):
        gain = nn.init.calculate_gain(nonlinearity='relu')
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight, gain)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.NiN1(x)
        x = self.NiN2(x)
        x = self.NiN3(x)

        if self.training:
            x = x.view(-1, self.NUM_CLASSES)
        else:
            x = x.view(-1, self.NUM_CLASSES)
            x = F.softmax(x, dim=1)
        return x

    def save_state(self, optimizer, epoch, acc, time=0, tag=None):
        folder = './models_training/cifar_net/NiNnet_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        state = {
            'epoch': epoch,
            'state_dict': self.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        if tag is not None:
            filepath = 'model_NiNnet_ep{:d}_acc{:.3f}_{:s}'.format(
                epoch, float(acc), str(tag)
            )
        else:
            filepath = 'model_NiNnet_ep{:d}_acc{:.3f}'.format(
                epoch, float(acc)
            )
        torch.save(state, folder+filepath)

    def save_model(self, acc, time=0):
        folder = './models_trained/cifar_net/NiNnet_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        filepath = 'model_NiNnet_acc{:.3f}'.format(float(acc))
        torch.save(self.state_dict(), folder+filepath)


class NiNnet_wBN(nn.Module):
    def __init__(self, NUM_CLASSES=10):
        super().__init__()
        self.NUM_CLASSES = NUM_CLASSES

        self.NiN1 = NiN_wBN(filters=(3,192,160,96), kernels=(5,1,1),
                            pooling_type='max', pooling_stride=2,
                            pooling_kernel=3, dropout=0.5)
        self.NiN2 = NiN_wBN(filters=(96,192,192,192), kernels=(5,1,1),
                            pooling_type='avg', pooling_stride=2,
                            pooling_kernel=3, dropout=0.5)
        self.NiN3 = NiN_wBN(filters=(192,192,192,self.NUM_CLASSES), kernels=(3,1,1),
                            pooling_type='avg', pooling_stride=2,
                            pooling_kernel=7, dropout=0)

        self._initialize_weights()

    def _initialize_weights(self):
        gain = nn.init.calculate_gain(nonlinearity='relu')
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight, gain)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.NiN1(x)
        x = self.NiN2(x)
        x = self.NiN3(x)

        if self.training:
            x = x.view(-1, self.NUM_CLASSES)
        else:
            x = x.view(-1, self.NUM_CLASSES)
            x = F.softmax(x, dim=1)
        return x

    def save_state(self, optimizer, epoch, acc, time=0, tag=None):
        folder = './models_training/cifar_net/NiNnet_wBN_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        state = {
            'epoch': epoch,
            'state_dict': self.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        if tag is not None:
            filepath = 'model_NiNnet_wBN_ep{:d}_acc{:.3f}_{:s}'.format(
                epoch, float(acc), str(tag)
            )
        else:
            filepath = 'model_NiNnet_wBN_ep{:d}_acc{:.3f}'.format(
                epoch, float(acc)
            )
        torch.save(state, folder+filepath)

    def save_model(self, acc, time=0):
        folder = './models_trained/cifar_net/NiNnet_wBN_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        filepath = 'model_NiNnet_wBN_acc{:.3f}'.format(float(acc))
        torch.save(self.state_dict(), folder+filepath)


class LiteNiNnet(nn.Module):
    def __init__(self, NUM_CLASSES=10):
        super().__init__()
        self.NUM_CLASSES = NUM_CLASSES

        self.NiN1 = NiN_wBN(filters=(3,192,160,96), kernels=(5,1,1),
                            pooling_type='max', pooling_stride=2,
                            pooling_kernel=5, dropout=0.5)
        self.NiN2 = NiN_wBN(filters=(96,192,192,self.NUM_CLASSES), kernels=(5,1,1),
                            pooling_type='avg', pooling_stride=2,
                            pooling_kernel=14, dropout=0)

        self._initialize_weights()

    def _initialize_weights(self):
        gain = nn.init.calculate_gain(nonlinearity='relu')
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight, gain)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.NiN1(x)
        x = self.NiN2(x)

        if self.training:
            x = x.view(-1, self.NUM_CLASSES)
        else:
            x = x.view(-1, self.NUM_CLASSES)
            x = F.softmax(x, dim=1)
        return x

    def save_state(self, optimizer, epoch, acc, time=0, tag=None):
        folder = './models_training/cifar_net/LiteNiNnet_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        state = {
            'epoch': epoch,
            'state_dict': self.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        if tag is not None:
            filepath = 'model_LiteNiNnet_ep{:d}_acc{:.3f}_{:s}'.format(
                epoch, float(acc), str(tag)
            )
        else:
            filepath = 'model_LiteNiNnet_ep{:d}_acc{:.3f}'.format(
                epoch, float(acc)
            )
        torch.save(state, folder+filepath)

    def save_model(self, acc, time=0):
        folder = './models_trained/cifar_net/LiteNiNnet_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        filepath = 'model_LiteNiNnet_acc{:.3f}'.format(float(acc))
        torch.save(self.state_dict(), folder+filepath)


class MaxoutNiNnet(nn.Module):
    def __init__(self, NUM_CLASSES=10):
        super().__init__()
        self.NUM_CLASSES = NUM_CLASSES

        self.NiN1 = NiN_wBN(filters=(3,192,160,96), kernels=(5,1,1),
                            pooling_type='max', pooling_stride=2,
                            pooling_kernel=3, dropout=0.5)
        self.NiN2 = NiN_wBN(filters=(96,192,192,192), kernels=(5,1,1),
                            pooling_type='avg', pooling_stride=2,
                            pooling_kernel=3, dropout=0.5)
        self.NiN3 = MaxoutNiN(filters=(192,192,192,self.NUM_CLASSES), kernels=(3,1,1),
                              pooling_type='avg', pooling_stride=2,
                              pooling_kernel=7, dropout=0, maxout_units=4)

        self._initialize_weights()

    def _initialize_weights(self):
        gain = nn.init.calculate_gain(nonlinearity='relu')
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight, gain)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.NiN1(x)
        x = self.NiN2(x)
        x = self.NiN3(x)

        if self.training:
            x = x.view(-1, self.NUM_CLASSES)
        else:
            x = x.view(-1, self.NUM_CLASSES)
            x = F.softmax(x, dim=1)
        return x

    def save_state(self, optimizer, epoch, acc, time=0, tag=None):
        folder = './models_training/cifar_net/MaxoutNiNnet_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        state = {
            'epoch': epoch,
            'state_dict': self.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        if tag is not None:
            filepath = 'model_MaxoutNiNnet_ep{:d}_acc{:.3f}_{:s}'.format(
                epoch, float(acc), str(tag)
            )
        else:
            filepath = 'model_MaxoutNiNnet_ep{:d}_acc{:.3f}'.format(
                epoch, float(acc)
            )
        torch.save(state, folder+filepath)

    def save_model(self, acc, time=0):
        folder = './models_trained/cifar_net/MaxoutNiNnet_{:d}/'.format(int(time))
        if not os.path.isdir(folder):
            os.mkdir(folder)
        filepath = 'model_MaxoutNiNnet_acc{:.3f}'.format(float(acc))
        torch.save(self.state_dict(), folder+filepath)

## cifar_net.py
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

import numpy.random
from tqdm import tqdm
from time import time

from cifar_models import *

NUM_EPOCHS = 1
NUM_CLASSES = 10
BATCH_SIZE = 128
NUM_WORKERS = 2
LEARNING_RATE = 0.001
ANNEALING_PERIOD = 15

device = torch.device("cpu")
time = int(time())

def main():
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(1,1)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                                 transform=train_transform,
                                                 download=True)

    test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                                transform=test_transform,
                                                download=True)


    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=BATCH_SIZE,
                                               num_workers=NUM_WORKERS,
                                               shuffle=True)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=BATCH_SIZE,
                                              num_workers=NUM_WORKERS,
                                              shuffle=True)

    classes = ('plane', 'car', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck')

    # model = GoogLeNet(inc1_depth=(32,32),
    #                   inc2_depth=(32,32),
    #                   inc3_depth=(32,32))
    # model = GoogLeNet2()
    # model = DenseNet()
    # model = CNN()
    # model = DeepCNN()
    # model = NiNnet()
    # model = NiNnet_wBN()
    model = LiteNiNnet()
    # model = MaxoutNiNnet()
    model = model.to(device)
    print(model)

    model.train()
    loss_fun = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max',
                                                     patience=2, factor=0.2,
                                                     cooldown=2, verbose=True)
    # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
    #                                                  T_max=ANNEALING_PERIOD)

    for epoch in range(NUM_EPOCHS):
        print('\n------- EPOCH {} -------'.format(epoch+1))
        running_loss = 0.0
        print('Learning Rate: {:.2e}'.format(optimizer.param_groups[0]['lr']))
        num_cycles = 0
        total_loss = 0.0

        for i, data in enumerate(tqdm(train_loader, ncols=100)):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)


            outputs = model(inputs)
            loss = loss_fun(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            total_loss += loss.item()
            if i % 40 == 39:
                tqdm.write('[{:d}, {:d}] loss: {:.2e}'.format(
                           epoch+1, (i+1) * BATCH_SIZE, running_loss))
                running_loss = 0.0

        with torch.no_grad():
            correct = 0
            total = 0
            for _, data in enumerate(test_loader):
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                _, predicted = torch.max(F.softmax(outputs.data, dim=1), 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        acc = correct / total
        print('Loss on training set: %.3e' % (total_loss))
        print('Accuracy on test set: %.1f %%' % (100 * acc))
        total_loss = 0.0

        model.save_state(optimizer, epoch+1, acc, time)
        scheduler.step(acc)
        if optimizer.param_groups[0]['lr'] < 1e-4*LEARNING_RATE:
            break

        # scheduler.step(epoch=(epoch+1) % ANNEALING_PERIOD)
        # if (epoch+1) % ANNEALING_PERIOD == 0:
        #     num_cycles += 1
        #     model.save_state(optimizer, epoch+1, acc, time,
        #                      tag='cycle{:d}'.format(num_cycles))
        # else:
        #     model.save_state(optimizer, epoch+1, acc, time)

    print('Finished Training\n')

    model.eval()

    correct = 0
    total = 0
    with torch.no_grad():
        for _, data in enumerate(test_loader):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    acc = correct / total
    print('Accuracy on test dataset: %.1f %%' % (
        100 * acc))
    model.save_model(acc, time=time)

if __name__ == '__main__':
    main()

## F_activations.py
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

def swish(x):
    p = F.sigmoid(x)
    p = p.mul(x)
    return p


def maxout(x, num_units):
    shape = list(x.size())
    shape[1] /= num_units
    shape.append(num_units)
    max_dim = len(shape) - 1
    maxout, _ = x.view(*shape).max(max_dim)
    return maxout
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torch.autograd import Variable

	class Swish(nn.Module):
	def __init__(self):
	super().__init__()

	def forward(self, x):
	p = F.sigmoid(x)
	p = p.mul(x)
	return p

	def to(self, device):
	return self.to(device)

	class Swish_beta(nn.Module):
	def __init__(self):
	super().__init__()
	self.beta = nn.Parameter(torch.Tensor([1]), requires_grad=True)

	def forward(self, x):
	p = F.sigmoid(self.beta*x)
	p = p.mul(x)
	return p

	def to(self, device):
	self.beta = self.beta.to(device)
	return self.to(device)
	import torch
	import torch.nn as nn
	import torch.optim as optim
	import torch.nn.functional as F
	import torchvision
	import torchvision.transforms as transforms

	import numpy.random
	from tqdm import tqdm
	from time import time

	from cifar_models import *

	NUM_EPOCHS = 1
	NUM_CLASSES = 10
	BATCH_SIZE = 128
	NUM_WORKERS = 2
	LEARNING_RATE = 0.001
	ANNEALING_PERIOD = 15

	device = torch.device("cpu")
	time = int(time())

	def main():
	train_transform = transforms.Compose([
	transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(1,1)),
	transforms.RandomHorizontalFlip(),
	transforms.ToTensor(),
	transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
	])

	test_transform = transforms.Compose([
	transforms.ToTensor(),
	transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
	])

	train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
	transform=train_transform,
	download=True)

	test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
	transform=test_transform,
	download=True)


	train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
	batch_size=BATCH_SIZE,
	num_workers=NUM_WORKERS,
	shuffle=True)

	test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
	batch_size=BATCH_SIZE,
	num_workers=NUM_WORKERS,
	shuffle=True)

	classes = ('plane', 'car', 'bird', 'cat', 'deer',
	'dog', 'frog', 'horse', 'ship', 'truck')

	# model = GoogLeNet(inc1_depth=(32,32),
	# inc2_depth=(32,32),
	# inc3_depth=(32,32))
	# model = GoogLeNet2()
	# model = DenseNet()
	# model = CNN()
	# model = DeepCNN()
	# model = NiNnet()
	# model = NiNnet_wBN()
	model = LiteNiNnet()
	# model = MaxoutNiNnet()
	model = model.to(device)
	print(model)

	model.train()
	loss_fun = nn.CrossEntropyLoss()
	optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
	scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max',
	patience=2, factor=0.2,
	cooldown=2, verbose=True)
	# scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
	# T_max=ANNEALING_PERIOD)

	for epoch in range(NUM_EPOCHS):
	print('\n------- EPOCH {} -------'.format(epoch+1))
	running_loss = 0.0
	print('Learning Rate: {:.2e}'.format(optimizer.param_groups[0]['lr']))
	num_cycles = 0
	total_loss = 0.0

	for i, data in enumerate(tqdm(train_loader, ncols=100)):
	inputs, labels = data
	inputs, labels = inputs.to(device), labels.to(device)


	outputs = model(inputs)
	loss = loss_fun(outputs, labels)

	optimizer.zero_grad()
	loss.backward()
	optimizer.step()

	running_loss += loss.item()
	total_loss += loss.item()
	if i % 40 == 39:
	tqdm.write('[{:d}, {:d}] loss: {:.2e}'.format(
	epoch+1, (i+1) * BATCH_SIZE, running_loss))
	running_loss = 0.0

	with torch.no_grad():
	correct = 0
	total = 0
	for _, data in enumerate(test_loader):
	inputs, labels = data
	inputs, labels = inputs.to(device), labels.to(device)

	outputs = model(inputs)
	_, predicted = torch.max(F.softmax(outputs.data, dim=1), 1)
	total += labels.size(0)
	correct += (predicted == labels).sum().item()

	acc = correct / total
	print('Loss on training set: %.3e' % (total_loss))
	print('Accuracy on test set: %.1f %%' % (100 * acc))
	total_loss = 0.0

	model.save_state(optimizer, epoch+1, acc, time)
	scheduler.step(acc)
	if optimizer.param_groups[0]['lr'] < 1e-4*LEARNING_RATE:
	break

	# scheduler.step(epoch=(epoch+1) % ANNEALING_PERIOD)
	# if (epoch+1) % ANNEALING_PERIOD == 0:
	# num_cycles += 1
	# model.save_state(optimizer, epoch+1, acc, time,
	# tag='cycle{:d}'.format(num_cycles))
	# else:
	# model.save_state(optimizer, epoch+1, acc, time)

	print('Finished Training\n')

	model.eval()

	correct = 0
	total = 0
	with torch.no_grad():
	for _, data in enumerate(test_loader):
	inputs, labels = data
	inputs, labels = inputs.to(device), labels.to(device)

	outputs = model(inputs)
	_, predicted = torch.max(outputs.data, 1)
	total += labels.size(0)
	correct += (predicted == labels).sum().item()

	acc = correct / total
	print('Accuracy on test dataset: %.1f %%' % (
	100 * acc))
	model.save_model(acc, time=time)

	if __name__ == '__main__':
	main()