Skip to content

Instantly share code, notes, and snippets.

@DanWaxman
Created November 7, 2023 13:43
Show Gist options
  • Save DanWaxman/b98d0c5b11b7badd597d4029ee59619d to your computer and use it in GitHub Desktop.
Save DanWaxman/b98d0c5b11b7badd597d4029ee59619d to your computer and use it in GitHub Desktop.
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import time
######
# Adapted from https://github.com/kuangliu/pytorch-cifar
#####
device = 'cuda' if torch.cuda.is_available() else 'cpu'
def swish(x):
return x * x.sigmoid()
def drop_connect(x, drop_ratio):
keep_ratio = 1.0 - drop_ratio
mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
mask.bernoulli_(keep_ratio)
x.div_(keep_ratio)
x.mul_(mask)
return x
class SE(nn.Module):
'''Squeeze-and-Excitation block with Swish.'''
def __init__(self, in_channels, se_channels):
super(SE, self).__init__()
self.se1 = nn.Conv2d(in_channels, se_channels,
kernel_size=1, bias=True)
self.se2 = nn.Conv2d(se_channels, in_channels,
kernel_size=1, bias=True)
def forward(self, x):
out = F.adaptive_avg_pool2d(x, (1, 1))
out = swish(self.se1(out))
out = self.se2(out).sigmoid()
out = x * out
return out
class Block(nn.Module):
'''expansion + depthwise + pointwise + squeeze-excitation'''
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
expand_ratio=1,
se_ratio=0.,
drop_rate=0.):
super(Block, self).__init__()
self.stride = stride
self.drop_rate = drop_rate
self.expand_ratio = expand_ratio
# Expansion
channels = expand_ratio * in_channels
self.conv1 = nn.Conv2d(in_channels,
channels,
kernel_size=1,
stride=1,
padding=0,
bias=False)
self.bn1 = nn.BatchNorm2d(channels)
# Depthwise conv
self.conv2 = nn.Conv2d(channels,
channels,
kernel_size=kernel_size,
stride=stride,
padding=(1 if kernel_size == 3 else 2),
groups=channels,
bias=False)
self.bn2 = nn.BatchNorm2d(channels)
# SE layers
se_channels = int(in_channels * se_ratio)
self.se = SE(channels, se_channels)
# Output
self.conv3 = nn.Conv2d(channels,
out_channels,
kernel_size=1,
stride=1,
padding=0,
bias=False)
self.bn3 = nn.BatchNorm2d(out_channels)
# Skip connection if in and out shapes are the same (MV-V2 style)
self.has_skip = (stride == 1) and (in_channels == out_channels)
def forward(self, x):
out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
out = swish(self.bn2(self.conv2(out)))
out = self.se(out)
out = self.bn3(self.conv3(out))
if self.has_skip:
if self.training and self.drop_rate > 0:
out = drop_connect(out, self.drop_rate)
out = out + x
return out
class EfficientNet(nn.Module):
def __init__(self, cfg, num_classes=10):
super(EfficientNet, self).__init__()
self.cfg = cfg
self.conv1 = nn.Conv2d(3,
32,
kernel_size=3,
stride=1,
padding=1,
bias=False)
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_channels=32)
self.linear = nn.Linear(cfg['out_channels'][-1], num_classes)
def _make_layers(self, in_channels):
layers = []
cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size',
'stride']]
b = 0
blocks = sum(self.cfg['num_blocks'])
for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
strides = [stride] + [1] * (num_blocks - 1)
for stride in strides:
drop_rate = self.cfg['drop_connect_rate'] * b / blocks
layers.append(
Block(in_channels,
out_channels,
kernel_size,
stride,
expansion,
se_ratio=0.25,
drop_rate=drop_rate))
in_channels = out_channels
return nn.Sequential(*layers)
def forward(self, x):
out = swish(self.bn1(self.conv1(x)))
out = self.layers(out)
out = F.adaptive_avg_pool2d(out, 1)
out = out.view(out.size(0), -1)
dropout_rate = self.cfg['dropout_rate']
if self.training and dropout_rate > 0:
out = F.dropout(out, p=dropout_rate)
out = self.linear(out)
return out
def EfficientNetB0():
cfg = {
'num_blocks': [1, 2, 2, 3, 3, 4, 1],
'expansion': [1, 6, 6, 6, 6, 6, 6],
'out_channels': [16, 24, 40, 80, 112, 192, 320],
'kernel_size': [3, 3, 5, 3, 5, 5, 3],
'stride': [1, 2, 2, 2, 1, 2, 1],
'dropout_rate': 0.2,
'drop_connect_rate': 0.2,
}
return EfficientNet(cfg)
print('Using device ' + device)
print('Preparing Data')
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
trainset = torchvision.datasets.CIFAR10(
root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
trainset, batch_size=128, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(
root='./data', train=False, download=True, transform=transform_train)
testloader = torch.utils.data.DataLoader(
testset, batch_size=100, shuffle=True, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
net = EfficientNetB0().to(device)
loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
start_time = time.time()
print(f'Training Start Time: {start_time}')
for epoch in range(10):
print(f'Epoch {epoch}')
net.train()
for batch_idx, (inputs, targets) in enumerate(trainloader):
inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
outputs = net(inputs)
l = loss(outputs, targets)
l.backward()
optimizer.step()
net.eval()
test_loss = 0
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(testloader):
inputs, targets = inputs.to(device), targets.to(device)
outputs = net(inputs)
l = loss(outputs, targets)
test_loss += l.item()
print(f'Test loss = {test_loss}')
scheduler.step()
end_time = time.time()
print(f'Training End Time: {end_time}')
print(f'Total Training Time: {end_time - start_time}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment