Skip to content

Instantly share code, notes, and snippets.

@preritg
Created April 26, 2020 00:34
Show Gist options
  • Save preritg/db5ee445eb89e7a5ac256949cdf9e87a to your computer and use it in GitHub Desktop.
Save preritg/db5ee445eb89e7a5ac256949cdf9e87a to your computer and use it in GitHub Desktop.
code to train a NN from scratch to classify dog breeds
import os
import torch
from torchvision import datasets
import torchvision.transforms as transforms
### TODO: Write data loaders for training, validation, and test sets
## Specify appropriate transforms, and batch_sizes
batch_size = 32
num_workers = 0
data_transforms_train = transforms.Compose([transforms.Resize((224, 224)),
transforms.RandomVerticalFlip(p=0.5),
transforms.ToTensor(),
transforms.Normalize(mean=[0.486, 0.456, 0.406],
std=[0.229, 0.224, 0.225])])
data_transforms = transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.486, 0.456, 0.406],
std=[0.229, 0.224, 0.225])])
train_dataset = datasets.ImageFolder('/content/gdrive/My Drive/udacity_data/dogImages/train/', transform=data_transforms_train)
valid_dataset = datasets.ImageFolder('/content/gdrive/My Drive/udacity_data/dogImages/valid/', transform=data_transforms)
test_dataset = datasets.ImageFolder('/content/gdrive/My Drive/udacity_data/dogImages/test/', transform=data_transforms)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
num_workers=num_workers,shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size,
num_workers=num_workers,shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
num_workers=num_workers,shuffle=False)
loaders_scratch = {'train': train_loader,
'test': test_loader,
'valid': valid_loader}
# check if CUDA is available
use_cuda = torch.cuda.is_available()
import torch.nn as nn
import torch.nn.functional as F
# define the CNN architecture
class Net(nn.Module):
### TODO: choose an architecture, and complete the class
def __init__(self):
super(Net, self).__init__()
## Define layers of a CNN
# Architecture #1
# self.conv1 = nn.Conv2d(3, 16, 3)
# self.conv2 = nn.Conv2d(16, 64, 3)
# self.conv3 = nn.Conv2d(64, 128, 3)
# self.conv4 = nn.Conv2d(128, 256, 3)
# self.fc1 = nn.Linear(12*12*256, 1024)
# self.fc2 = nn.Linear(1024, 512)
# self.fc3 = nn.Linear(512, 256)
# self.fc4 = nn.Linear(256, 133)
# Architecture #2
self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
self.fc1 = nn.Linear(256*14*14, 2048)
self.fc2 = nn.Linear(2048, 1024)
self.fc3 = nn.Linear(1024, 133)
self.pool = nn.MaxPool2d(2, 2)
self.dropout = nn.Dropout(p=0.5)
def forward(self, x):
## Define forward behavior
# Architecture #1
# x = F.relu(self.conv1(x))
# x = self.pool(x)
# x = self.dropout(x)
# x = F.relu(self.conv2(x))
# x = self.pool(x)
# x = self.dropout(x)
# x = F.relu(self.conv3(x))
# x = self.pool(x)
# x = self.dropout(x)
# x = F.relu(self.conv4(x))
# x = self.pool(x)
# x = self.dropout(x)
# x = x.view(-1, 12*12*256)
# x = F.relu(self.fc1(x))
# x = self.dropout(x)
# x = F.relu(self.fc2(x))
# x = self.dropout(x)
# x = F.relu(self.fc3(x))
# x = self.fc4(x)
# Architecture #2
x = F.relu(self.conv1(x))
x = self.pool(x)
# x = self.dropout(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
# x = self.dropout(x)
x = F.relu(self.conv3(x))
x = self.pool(x)
# x = self.dropout(x)
x = F.relu(self.conv4(x))
x = self.pool(x)
x = x.view(-1, 256*14*14)
x = self.dropout(x)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
#-#-# You do NOT have to modify the code below this line. #-#-#
# instantiate the CNN
model_scratch = Net()
# move tensors to GPU if CUDA is available
if use_cuda:
model_scratch.cuda()
import torch.optim as optim
### TODO: select loss function
criterion_scratch = nn.CrossEntropyLoss()
### TODO: select optimizer
optimizer_scratch = optim.SGD(model_scratch.parameters(), lr=0.001)
%%time
import numpy as np
# the following import is required for training to be robust to truncated images
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
"""returns trained model"""
# initialize tracker for minimum validation loss
valid_loss_min = np.Inf
for epoch in range(1, n_epochs+1):
# initialize variables to monitor training and validation loss
train_loss = 0.0
valid_loss = 0.0
###################
# train the model #
###################
model.train()
for batch_idx, (data, target) in enumerate(loaders['train']):
# move to GPU
# if batch_idx>2:
# pass
# print('in training batch')
optimizer.zero_grad()
if use_cuda:
data, target = data.cuda(), target.cuda()
## find the loss and update the model parameters accordingly
## record the average training loss, using something like
## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
print("Batch #{} Training loss: {:.6f}".format(batch_idx, loss.data))
train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
######################
# validate the model #
######################
model.eval()
for batch_idx, (data, target) in enumerate(loaders['valid']):
# if batch_idx>2:
# pass
# move to GPU
if use_cuda:
data, target = data.cuda(), target.cuda()
## update the average validation loss
output = model(data)
loss = criterion(output, target)
valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
# print training/validation statistics
print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
epoch,
train_loss,
valid_loss
))
## TODO: save the model if validation loss has decreased
if valid_loss<valid_loss_min:
print("Validation loss has decreased, saving model....")
torch.save(model.state_dict(), save_path)
valid_loss_min = valid_loss
# return trained model
return model
# train the model
model_scratch = train(7, loaders_scratch, model_scratch, optimizer_scratch,
criterion_scratch, use_cuda, 'model_scratch.pt')
# load the model that got the best validation accuracy
model_scratch.load_state_dict(torch.load('model_scratch.pt'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment