Skip to content

Instantly share code, notes, and snippets.

@foolishflyfox
Last active December 24, 2018 07:40
Show Gist options
  • Save foolishflyfox/3ea9f6f7efcef88edc5b73a22066dfb1 to your computer and use it in GitHub Desktop.
Save foolishflyfox/3ea9f6f7efcef88edc5b73a22066dfb1 to your computer and use it in GitHub Desktop.
Pytorch Computer Vision Framework (Detailed)
import torch
import torchvision
import torch.utils.data
from torch.backends import cudnn
from torchvision import transforms
# some hyperparameters setting
use_gpu = True
train_batch_size = 64
val_batch_size = 64
if not torch.cuda.is_available():
use_gpu = False
# First step : prepare your images such as
# train
# ├── a
# │   └── 1.png
# ├── b
# │   └── 2.png
# └── c
# ├── 3.png
# └── 4.png
# Second step : Prepare images dataset
# Following is just a demo
custom_transforms_list = [
# Randomly change the brightness, contrast, saturation and hue of an image
# hue should be >=0 and <=0.5
transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.2),
# interpolation parameters are in PIL.Image
# Image.NEAREST=0, Image.ANTIALIAS=1, Image.BILINEAR=2, Image.BICUBIC=3
# size = (height, width)
transforms.Resize((224, 224), interpolation=3),
# torchvision.transforms.Pad(padding, fill=0, padding_mode='constant')
# fill=(255, 0, 0): red; padding_mode:constant/edge/reflect/symmetric
# origin [0,1,2]; reflect [2,1,0,1,2,1,0]; symmetric [1,0,0,1,2,2,1]
transforms.Pad(10, (255, 255, 255), 'constant'),
transforms.RandomCrop((224, 224)),
transforms.RandomHorizontalFlip(),
# transform Image to Tensor, range to [0, 1], dims from (H, W, C) to (C, H, W)
transforms.ToTensor(),
# Normalize parameters from ImageNet
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
]
# Compose the custom transform as a Compose object
custom_transforms = transforms.Compose(custom_transforms_list)
# Load Image Dataset
image_dataset = torchvision.datasets.ImageFolder(
root="~/data/classified_imgs",
# A function/transform that takes in an PIL image and returns a transformed version
transform=custom_transforms,
# A function/transform that takes in the label and transforms it
target_transform=None,
# A function to load an image given its path
# loader = PIL.Image.open,
)
# Create an image loader for creating batch data, and speed up load processing
# DataLoader: Combines a dataset and a sampler, and provides single- or multi-process iterators
# over the dataset
image_loader = torch.utils.data.DataLoader(
image_dataset,
batch_size=64,
# set to True to have the data reshuffled at every epoch, default is false
shuffle=True,
# how many subprocesses to use for data loading, 0 means that the data will be loaded
# in the main process
num_workers=4,
# If True, the data loader will copy tensor into cuda pinned memory memory before returning them
# pinned memory is always in physics memory, and won't swap to virtual memory
pin_memory=False,
# if True, drop the last incomplete batch
drop_last=False,
# if positive, the timeout value for collecting a batch from workers. Always be non-negtive
timeout=0)
# Validation dataset loader. Here we set it None.
val_dataset_loader = None
val_dataset_size = 1
# Third step: create you model, here we use ResNet50
# ToDo: may be we can create a custom deep-learning network
model = torchvision.models.resnet50(pretrained=False)
# Copy the model to CUDA memory, and return it
if use_gpu:
model = model.cuda()
cudnn.benchmark = True
# Fourth step: Create you criterion,
# e.g. classification-torch.nn.CrossEntropyLoss; regression-torch.nn.MSELoss
# ToDo: you can define your customized loss function
# !!! Note: if you use CrossEntropyLoss as criterion, you needn't (and shouldn't)
# set a softmax lay at the last of you model
criterion = torch.nn.CrossEntropyLoss()
if use_gpu:
criterion = criterion.cuda()
# Fifth step: Create you optimizer.
# If you need to move a model to GPU via .cuda(), please do so before constructing optimizers for it.
# Parameters of a model after .cude will be different objects with those before the call.
# ToDo: you can set different parameters for different strategy
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.01)
# Sixth step: Now, you get dataset_loader, model, criterion and optimizer,
# then you can train your model
# define epochs you want to train
epochs = 10
# get steps per epoch
steps_per_epoch = len(image_loader)
for epoch in range(epochs):
# ToDo: you can write you custom code to show the training process
print(f'Epoch {epochs:02d} / {epoch:02d}')
print('-'*30)
# One epoch: Train data and valid validation dataset if it is existed
# You may set some variable to record result, e.g. lossing value and accuracy.
train_loss, val_loss, val_accuracy = 0.0, 0.0, 0.0
# Training phase
for step, (input_imgs, labels) in enumerate(image_loader):
# show current step count in an epoch
print(f"\tstep {steps_per_epoch}/{step+1}\r")
if use_gpu:
input_imgs = input_imgs.cuda()
labels = labels.cuda()
# The critical 5 steps of train
optimizer.zero_grad()
outputs = model(input_imgs)
# criterion(outputs, labels) is the mean loss of this batch
step_loss = criterion(outputs, labels)
# lossing value back propagaton
step_loss.backward()
# update parameters in model
optimizer.step()
train_loss += step_loss
train_loss /= steps_per_epoch
print(f"train loss: {train_loss}")
# Validation phase
if val_dataset_loader is not None:
print('begin to validation process ...')
with torch.set_grad_enabled(False):
for input_imgs, labels in val_dataset_loader:
prediction = model(input_imgs)
val_step_loss = criterion(prediction, labels)
val_loss += val_step_loss
val_accuracy += (prediction.max(dim=1)[1]==labels).sum().item()
val_loss /= len(val_dataset_loader)
val_accuracy = float(val_accuracy)/val_dataset_size
print(f'val loss: {val_loss}, val accuracy: {val_accuracy}')
# Congratulations, you get a trained model
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment