Skip to content

Instantly share code, notes, and snippets.

@koshian2
Last active May 23, 2019 13:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save koshian2/f54fe6a4a71f3ba3d68cc2d90c0a3d6d to your computer and use it in GitHub Desktop.
Save koshian2/f54fe6a4a71f3ba3d68cc2d90c0a3d6d to your computer and use it in GitHub Desktop.
PyTorch FP32
import torch
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from pytorch_models import Layer10CNN, WideResNet
import numpy as np
import datetime
import time
import pickle
def dataloaders(batch_size):
# torchvisionの出力は[0, 1]
trans = transforms.Compose([
transforms.ToTensor()
])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=trans)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=True, num_workers=1)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True, transform=trans)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
shuffle=False, num_workers=1)
return trainloader, testloader
def train(batch_size, network, use_device):
if network == 0:
model = Layer10CNN()
elif network == 1:
model = WideResNet()
device = "cuda"
# num_workers=4, benchmark=True -> 18.45s
# num_workers=2, benchmark=True -> 15.75s
# num_workers=1, benchmark=True -> 14.2s
# num_workers=1. benchmark=False -> 19.83s
# multi workers=1 -> 12.54s
# multi workers=2 -> 14.1s
torch.backends.cudnn.benchmark = True
model = model.cuda()
if use_device == "multigpu":
model = torch.nn.DataParallel(model)
train_loader, test_loader = dataloaders(batch_size)
criterion = torch.nn.CrossEntropyLoss()
initial_lr = 0.1 * batch_size / 128
optimizer = optim.SGD(model.parameters(), lr=initial_lr, momentum=0.9)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 80], gamma=0.1)
result = {}
result["train_begin"] = datetime.datetime.now()
result["times"] = []
result["val_acc"] = []
result["loss"] = []
for epoch in range(100):
start_time = time.time()
# train
train_loss = 0.0
for i, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss /= i+1 # per batch loss
# Validation
with torch.no_grad():
correct, total = 0, 0
for inputs, labels in test_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
_, pred = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (pred == labels).sum().item()
val_acc = correct / total
# log
elapsed = time.time() - start_time
result["times"].append(elapsed)
result["loss"].append(train_loss)
result["val_acc"].append(val_acc)
print(f"Epoch {epoch+1} loss = {train_loss:.06} val_acc = {val_acc:.04} | {elapsed:0.4}s")
result["train_end"] = datetime.datetime.now()
with open(f"result/{use_device}_{network}_{batch_size}.pkl", "wb") as fp:
pickle.dump(result, fp)
def train_gpus():
for network in [0, 1]:
for device in ["gpu", "multigpu"]:
for batch in [128, 256, 512, 1024, 2048]:
if device == "gpu":
if network == 1 and batch > 256: continue
if device == "multigpu":
if network == 1 and batch > 512: continue
train(batch, network, device)
time.sleep(60)
if __name__ == "__main__":
train_gpus()
import torch
import torch.nn as nn
import torch.nn.functional as F
# 10 Layers Network
class Layer10CNN(nn.Module):
def __init__(self):
super().__init__()
self.block1 = self.create_block(3, 64, 3, False)
self.block2 = self.create_block(64, 128, 3, True)
self.block3 = self.create_block(128, 256, 3, True)
self.pool = nn.AvgPool2d(8)
self.fc = nn.Linear(256, 10)
def create_block(self, in_ch, out_ch, reps, initial_pool):
# Conv->BN->ReLU x reps
layers = []
if initial_pool:
layers.append(nn.AvgPool2d(2))
for i in range(reps):
in_n = in_ch if i == 0 else out_ch
layers.append(nn.Conv2d(in_n, out_ch, 3, padding=1))
layers.append(nn.BatchNorm2d(out_ch))
layers.append(nn.ReLU(inplace=True))
return nn.Sequential(*layers)
def forward(self, x):
out = self.block1(x)
out = self.block2(out)
out = self.block3(out)
out = self.pool(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
# WideResNet 28-10
class ResidualBlock(nn.Module):
def __init__(self, in_ch, ch, stride, conv_before_skip):
super().__init__()
self.conv_before_skip = conv_before_skip
if conv_before_skip:
self.skip_conv = nn.Conv2d(in_ch, ch, 1, stride=stride)
self.skip_bn = nn.BatchNorm2d(ch)
self.main_conv1 = nn.Conv2d(in_ch, ch, 3, stride=stride, padding=1)
self.main_bn1 = nn.BatchNorm2d(ch)
self.main_conv2 = nn.Conv2d(ch, ch, 3, padding=1)
self.main_bn2 = nn.BatchNorm2d(ch)
def forward(self, x):
if self.conv_before_skip:
skip = F.relu(self.skip_bn(self.skip_conv(x)), True)
else:
skip = x
main = F.relu(self.main_bn1(self.main_conv1(x)), True)
main = F.relu(self.main_bn2(self.main_conv2(main)), True)
return main + skip
class WideResNet(nn.Module):
def __init__(self, N=4, k=10):
super().__init__()
self.initial_conv = nn.Conv2d(3, 16, 3, padding=1)
self.initial_bn = nn.BatchNorm2d(16)
self.block1 = self.create_residual_blocks(16, 16*k, N, 1)
self.block2 = self.create_residual_blocks(16 * k, 32 * k, N, 2)
self.block3 = self.create_residual_blocks(32*k, 64*k, N, 2)
self.pool = nn.AvgPool2d(8)
self.fc = nn.Linear(64*k, 10)
def create_residual_blocks(self, in_ch, out_ch, N, stride):
layers = []
for i in range(N):
if i == 0:
layers.append(ResidualBlock(in_ch, out_ch, stride, True))
else:
layers.append(ResidualBlock(out_ch, out_ch, 1, False))
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.initial_bn(self.initial_conv(x)), True)
out = self.block1(out)
out = self.block2(out)
out = self.block3(out)
out = self.pool(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment