Created
May 29, 2019 08:18
Star
You must be signed in to star a gist
My approach on using Ax/BoTorch to optimize a CNN for Traffic Sign classification
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
####################################################### | |
## this code was used in a Jupyter notebook | |
## therefore some jupyter specific lines are commented out | |
#%matplotlib inline | |
#%config InlineBackend.figure_format = 'retina' | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import torch | |
from torch import nn | |
from torch import optim | |
import torch.nn.functional as F | |
import torchvision | |
from torchvision import datasets, transforms, models | |
from ax.plot.contour import plot_contour | |
from ax.plot.trace import optimization_trace_single_method | |
from ax.service.managed_loop import optimize | |
from ax.utils.notebook.plotting import render, init_notebook_plotting | |
from ax.utils.tutorials.cnn_utils import load_mnist, train, evaluate | |
#init_notebook_plotting() | |
dtype = torch.float | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
############################################################################## | |
## The data used here is the Belgian Traffic Sign dataset from | |
## https://btsd.ethz.ch/shareddata/ >> BelgiumTS for Classification (cropped images) | |
data_dir = '/home/jzornig/data/BelgiumTS/Training/' | |
def load_split_train_test(datadir, valid_size = .2): | |
train_transforms = transforms.Compose([transforms.Resize((57,57)), | |
transforms.ToTensor(), | |
]) | |
test_transforms = transforms.Compose([transforms.Resize((57,57)), | |
transforms.ToTensor(), | |
]) | |
train_data = datasets.ImageFolder(datadir, | |
transform=train_transforms) | |
test_data = datasets.ImageFolder(datadir, | |
transform=test_transforms) | |
num_train = len(train_data) | |
indices = list(range(num_train)) | |
split = int(np.floor(valid_size * num_train)) | |
np.random.shuffle(indices) | |
from torch.utils.data.sampler import SubsetRandomSampler | |
train_idx, test_idx = indices[split:], indices[:split] | |
train_sampler = SubsetRandomSampler(train_idx) | |
test_sampler = SubsetRandomSampler(test_idx) | |
trainloader = torch.utils.data.DataLoader(train_data, | |
sampler=train_sampler, batch_size=64) | |
testloader = torch.utils.data.DataLoader(test_data, | |
sampler=test_sampler, batch_size=64) | |
return trainloader, testloader | |
train_loader, valid_loader = load_split_train_test(data_dir, .2) | |
######################################### | |
## define an optimization target function | |
## with the model included | |
def train_evaluate(parameterization): | |
model = models.resnet50(pretrained=True) | |
for param in model.parameters(): | |
#param.requires_grad = True # VGG19 | |
param.requires_grad = False # ResNet50 | |
model.fc = nn.Sequential(nn.Linear(2048, 512), | |
nn.ReLU(), | |
nn.Dropout(0.2), | |
nn.Linear(512, 80), # 80 is the number of classes/outputs | |
nn.LogSoftmax(dim=1)) | |
#criterion = nn.NLLLoss() | |
optimizer = optim.Adam(model.fc.parameters(), lr=parameterization['lr']) | |
return evaluate( | |
net=model, | |
data_loader=train_loader, | |
dtype=dtype, | |
device=device, | |
) | |
######################## | |
## Run the optimization | |
best_parameters, values, experiment, model = optimize( | |
parameters=[ | |
{"name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True}, | |
### extend this dictionary to other hyperparameters you want to use which are forwarded to the evaluation_function | |
#{"name": "momentum", "type": "range", "bounds": [0.0, 1.0]}, | |
#{"name": "max_epoch", "type": "range", "bounds": [1, 30]}, | |
#{"name": "stepsize", "type": "range", "bounds": [20, 40]}, | |
#{"name": "batchsize", "type": "range", "bounds": [10, 100]}, | |
], | |
evaluation_function=train_evaluate, | |
objective_name='accuracy', | |
) | |
print(best_parameters) | |
means, covariances = values | |
print(means) | |
print(covariances) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello! I'm currently studying Deep Learning and decided to try hyperparameters optimisation using Ax/Botorch and after few attempts found your gist. I am not pretty confident in my knowledge yet and I don't understand few moments in your code. I would be very grateful if you could explain me them, please
In line #76 you define the optimiser, but never pass it anywhere
You use optimize function in line #87, but it looks like you never retrain your model. In ax tutorial they do have train method in train_evaluate function, but you don't seem to have any, just evaluation.
Have a good day!