pinkerltm/ResNet50_BO.py

## ResNet50_BO.py
#######################################################
## this code was used in a Jupyter notebook
## therefore some jupyter specific lines are commented out


#%matplotlib inline
#%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms, models

from ax.plot.contour import plot_contour
from ax.plot.trace import optimization_trace_single_method
from ax.service.managed_loop import optimize
from ax.utils.notebook.plotting import render, init_notebook_plotting
from ax.utils.tutorials.cnn_utils import load_mnist, train, evaluate

#init_notebook_plotting()

dtype = torch.float
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

##############################################################################
## The data used here is the Belgian Traffic Sign dataset from
## https://btsd.ethz.ch/shareddata/ >> BelgiumTS for Classification (cropped images)

data_dir = '/home/jzornig/data/BelgiumTS/Training/'
def load_split_train_test(datadir, valid_size = .2):
    train_transforms = transforms.Compose([transforms.Resize((57,57)),
                                       transforms.ToTensor(),
                                       ])
    test_transforms = transforms.Compose([transforms.Resize((57,57)),
                                      transforms.ToTensor(),
                                      ])
    train_data = datasets.ImageFolder(datadir,
                    transform=train_transforms)
    test_data = datasets.ImageFolder(datadir,
                    transform=test_transforms)
    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))
    np.random.shuffle(indices)
    from torch.utils.data.sampler import SubsetRandomSampler
    train_idx, test_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(test_idx)
    trainloader = torch.utils.data.DataLoader(train_data,
                   sampler=train_sampler, batch_size=64)
    testloader = torch.utils.data.DataLoader(test_data,
                   sampler=test_sampler, batch_size=64)
    return trainloader, testloader

train_loader, valid_loader = load_split_train_test(data_dir, .2)

#########################################
## define an optimization target function
## with the model included

def train_evaluate(parameterization):
    model = models.resnet50(pretrained=True)
    for param in model.parameters():
        #param.requires_grad = True # VGG19
        param.requires_grad = False # ResNet50

    model.fc = nn.Sequential(nn.Linear(2048, 512),
                                 nn.ReLU(),
                                 nn.Dropout(0.2),
                                 nn.Linear(512, 80), # 80 is the number of classes/outputs
                                 nn.LogSoftmax(dim=1))
    #criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.fc.parameters(), lr=parameterization['lr'])
    return evaluate(
        net=model,
        data_loader=train_loader,
        dtype=dtype,
        device=device,
    )

########################
## Run the optimization

best_parameters, values, experiment, model = optimize(
    parameters=[
        {"name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True},

        ### extend this dictionary to other hyperparameters you want to use which are forwarded to the evaluation_function
        #{"name": "momentum", "type": "range", "bounds": [0.0, 1.0]},
        #{"name": "max_epoch", "type": "range", "bounds": [1, 30]},
        #{"name": "stepsize", "type": "range", "bounds": [20, 40]},
        #{"name": "batchsize", "type": "range", "bounds": [10, 100]},

    ],
    evaluation_function=train_evaluate,
    objective_name='accuracy',
)

print(best_parameters)
means, covariances = values
print(means)
print(covariances)
	#######################################################
	## this code was used in a Jupyter notebook
	## therefore some jupyter specific lines are commented out


	#%matplotlib inline
	#%config InlineBackend.figure_format = 'retina'
	import matplotlib.pyplot as plt
	import numpy as np
	import torch
	from torch import nn
	from torch import optim
	import torch.nn.functional as F
	import torchvision
	from torchvision import datasets, transforms, models

	from ax.plot.contour import plot_contour
	from ax.plot.trace import optimization_trace_single_method
	from ax.service.managed_loop import optimize
	from ax.utils.notebook.plotting import render, init_notebook_plotting
	from ax.utils.tutorials.cnn_utils import load_mnist, train, evaluate

	#init_notebook_plotting()

	dtype = torch.float
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	##############################################################################
	## The data used here is the Belgian Traffic Sign dataset from
	## https://btsd.ethz.ch/shareddata/ >> BelgiumTS for Classification (cropped images)

	data_dir = '/home/jzornig/data/BelgiumTS/Training/'
	def load_split_train_test(datadir, valid_size = .2):
	train_transforms = transforms.Compose([transforms.Resize((57,57)),
	transforms.ToTensor(),
	])
	test_transforms = transforms.Compose([transforms.Resize((57,57)),
	transforms.ToTensor(),
	])
	train_data = datasets.ImageFolder(datadir,
	transform=train_transforms)
	test_data = datasets.ImageFolder(datadir,
	transform=test_transforms)
	num_train = len(train_data)
	indices = list(range(num_train))
	split = int(np.floor(valid_size * num_train))
	np.random.shuffle(indices)
	from torch.utils.data.sampler import SubsetRandomSampler
	train_idx, test_idx = indices[split:], indices[:split]
	train_sampler = SubsetRandomSampler(train_idx)
	test_sampler = SubsetRandomSampler(test_idx)
	trainloader = torch.utils.data.DataLoader(train_data,
	sampler=train_sampler, batch_size=64)
	testloader = torch.utils.data.DataLoader(test_data,
	sampler=test_sampler, batch_size=64)
	return trainloader, testloader

	train_loader, valid_loader = load_split_train_test(data_dir, .2)

	#########################################
	## define an optimization target function
	## with the model included

	def train_evaluate(parameterization):
	model = models.resnet50(pretrained=True)
	for param in model.parameters():
	#param.requires_grad = True # VGG19
	param.requires_grad = False # ResNet50

	model.fc = nn.Sequential(nn.Linear(2048, 512),
	nn.ReLU(),
	nn.Dropout(0.2),
	nn.Linear(512, 80), # 80 is the number of classes/outputs
	nn.LogSoftmax(dim=1))
	#criterion = nn.NLLLoss()
	optimizer = optim.Adam(model.fc.parameters(), lr=parameterization['lr'])
	return evaluate(
	net=model,
	data_loader=train_loader,
	dtype=dtype,
	device=device,
	)

	########################
	## Run the optimization

	best_parameters, values, experiment, model = optimize(
	parameters=[
	{"name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True},

	### extend this dictionary to other hyperparameters you want to use which are forwarded to the evaluation_function
	#{"name": "momentum", "type": "range", "bounds": [0.0, 1.0]},
	#{"name": "max_epoch", "type": "range", "bounds": [1, 30]},
	#{"name": "stepsize", "type": "range", "bounds": [20, 40]},
	#{"name": "batchsize", "type": "range", "bounds": [10, 100]},

	],
	evaluation_function=train_evaluate,
	objective_name='accuracy',
	)

	print(best_parameters)
	means, covariances = values
	print(means)
	print(covariances)