Last active
December 24, 2018 07:40
-
-
Save foolishflyfox/3ea9f6f7efcef88edc5b73a22066dfb1 to your computer and use it in GitHub Desktop.
Pytorch Computer Vision Framework (Detailed)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torchvision | |
import torch.utils.data | |
from torch.backends import cudnn | |
from torchvision import transforms | |
# some hyperparameters setting | |
use_gpu = True | |
train_batch_size = 64 | |
val_batch_size = 64 | |
if not torch.cuda.is_available(): | |
use_gpu = False | |
# First step : prepare your images such as | |
# train | |
# ├── a | |
# │ └── 1.png | |
# ├── b | |
# │ └── 2.png | |
# └── c | |
# ├── 3.png | |
# └── 4.png | |
# Second step : Prepare images dataset | |
# Following is just a demo | |
custom_transforms_list = [ | |
# Randomly change the brightness, contrast, saturation and hue of an image | |
# hue should be >=0 and <=0.5 | |
transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.2), | |
# interpolation parameters are in PIL.Image | |
# Image.NEAREST=0, Image.ANTIALIAS=1, Image.BILINEAR=2, Image.BICUBIC=3 | |
# size = (height, width) | |
transforms.Resize((224, 224), interpolation=3), | |
# torchvision.transforms.Pad(padding, fill=0, padding_mode='constant') | |
# fill=(255, 0, 0): red; padding_mode:constant/edge/reflect/symmetric | |
# origin [0,1,2]; reflect [2,1,0,1,2,1,0]; symmetric [1,0,0,1,2,2,1] | |
transforms.Pad(10, (255, 255, 255), 'constant'), | |
transforms.RandomCrop((224, 224)), | |
transforms.RandomHorizontalFlip(), | |
# transform Image to Tensor, range to [0, 1], dims from (H, W, C) to (C, H, W) | |
transforms.ToTensor(), | |
# Normalize parameters from ImageNet | |
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), | |
] | |
# Compose the custom transform as a Compose object | |
custom_transforms = transforms.Compose(custom_transforms_list) | |
# Load Image Dataset | |
image_dataset = torchvision.datasets.ImageFolder( | |
root="~/data/classified_imgs", | |
# A function/transform that takes in an PIL image and returns a transformed version | |
transform=custom_transforms, | |
# A function/transform that takes in the label and transforms it | |
target_transform=None, | |
# A function to load an image given its path | |
# loader = PIL.Image.open, | |
) | |
# Create an image loader for creating batch data, and speed up load processing | |
# DataLoader: Combines a dataset and a sampler, and provides single- or multi-process iterators | |
# over the dataset | |
image_loader = torch.utils.data.DataLoader( | |
image_dataset, | |
batch_size=64, | |
# set to True to have the data reshuffled at every epoch, default is false | |
shuffle=True, | |
# how many subprocesses to use for data loading, 0 means that the data will be loaded | |
# in the main process | |
num_workers=4, | |
# If True, the data loader will copy tensor into cuda pinned memory memory before returning them | |
# pinned memory is always in physics memory, and won't swap to virtual memory | |
pin_memory=False, | |
# if True, drop the last incomplete batch | |
drop_last=False, | |
# if positive, the timeout value for collecting a batch from workers. Always be non-negtive | |
timeout=0) | |
# Validation dataset loader. Here we set it None. | |
val_dataset_loader = None | |
val_dataset_size = 1 | |
# Third step: create you model, here we use ResNet50 | |
# ToDo: may be we can create a custom deep-learning network | |
model = torchvision.models.resnet50(pretrained=False) | |
# Copy the model to CUDA memory, and return it | |
if use_gpu: | |
model = model.cuda() | |
cudnn.benchmark = True | |
# Fourth step: Create you criterion, | |
# e.g. classification-torch.nn.CrossEntropyLoss; regression-torch.nn.MSELoss | |
# ToDo: you can define your customized loss function | |
# !!! Note: if you use CrossEntropyLoss as criterion, you needn't (and shouldn't) | |
# set a softmax lay at the last of you model | |
criterion = torch.nn.CrossEntropyLoss() | |
if use_gpu: | |
criterion = criterion.cuda() | |
# Fifth step: Create you optimizer. | |
# If you need to move a model to GPU via .cuda(), please do so before constructing optimizers for it. | |
# Parameters of a model after .cude will be different objects with those before the call. | |
# ToDo: you can set different parameters for different strategy | |
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.01) | |
# Sixth step: Now, you get dataset_loader, model, criterion and optimizer, | |
# then you can train your model | |
# define epochs you want to train | |
epochs = 10 | |
# get steps per epoch | |
steps_per_epoch = len(image_loader) | |
for epoch in range(epochs): | |
# ToDo: you can write you custom code to show the training process | |
print(f'Epoch {epochs:02d} / {epoch:02d}') | |
print('-'*30) | |
# One epoch: Train data and valid validation dataset if it is existed | |
# You may set some variable to record result, e.g. lossing value and accuracy. | |
train_loss, val_loss, val_accuracy = 0.0, 0.0, 0.0 | |
# Training phase | |
for step, (input_imgs, labels) in enumerate(image_loader): | |
# show current step count in an epoch | |
print(f"\tstep {steps_per_epoch}/{step+1}\r") | |
if use_gpu: | |
input_imgs = input_imgs.cuda() | |
labels = labels.cuda() | |
# The critical 5 steps of train | |
optimizer.zero_grad() | |
outputs = model(input_imgs) | |
# criterion(outputs, labels) is the mean loss of this batch | |
step_loss = criterion(outputs, labels) | |
# lossing value back propagaton | |
step_loss.backward() | |
# update parameters in model | |
optimizer.step() | |
train_loss += step_loss | |
train_loss /= steps_per_epoch | |
print(f"train loss: {train_loss}") | |
# Validation phase | |
if val_dataset_loader is not None: | |
print('begin to validation process ...') | |
with torch.set_grad_enabled(False): | |
for input_imgs, labels in val_dataset_loader: | |
prediction = model(input_imgs) | |
val_step_loss = criterion(prediction, labels) | |
val_loss += val_step_loss | |
val_accuracy += (prediction.max(dim=1)[1]==labels).sum().item() | |
val_loss /= len(val_dataset_loader) | |
val_accuracy = float(val_accuracy)/val_dataset_size | |
print(f'val loss: {val_loss}, val accuracy: {val_accuracy}') | |
# Congratulations, you get a trained model | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment