Created
December 22, 2016 03:03
-
-
Save sathley/c1a0b34e7fadfb1ca16a0861afb3f20f to your computer and use it in GitHub Desktop.
Training model for Self Driving Car using Torch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'torch' | |
require 'nn' | |
require 'optim' | |
-- to specify these at runtime, you can do, e.g.: | |
-- $ lr=0.001 th main.lua | |
opt = { | |
dataset = 'simple', -- indicates what dataset load to use (in data.lua) | |
nThreads = 4, -- how many threads to pre-fetch data | |
batchSize = 200, -- self-explanatory | |
loadSize = 256, -- when loading images, resize first to this size | |
fineSize = 224, -- crop this size from the loaded image | |
nClasses = 1, -- number of category | |
lr = 0.001, -- learning rate | |
lr_decay = 30000, -- how often to decay learning rate (in epoch's) | |
beta1 = 0.9, -- momentum term for adam | |
meanIter = 0, -- how many iterations to retrieve for mean estimation | |
saveIter = 1000, -- write check point on this interval | |
niter = 50000, -- number of iterations through dataset | |
gpu = 1, -- which GPU to use; consider using CUDA_VISIBLE_DEVICES instead | |
cudnn = 1, -- whether to use cudnn or not | |
finetune = '', -- if set, will load this network instead of starting from scratch | |
randomize = 1, -- whether to shuffle the data file or not | |
cropping = 'random', -- options for data augmentation | |
display_port = 8000, -- port to push graphs | |
name = 'regression', --paths.basename(paths.thisfile()):sub(1,-5), -- the name of the experiment (by default, filename) | |
data_root = '/do_not_store/ananth/dataset/train/', | |
data_list = '/do_not_store/ananth/dataset/train/train_numbers.txt', | |
mean = {-0.083300798050439,-0.10651495109198,-0.17295466315224}, | |
} | |
-- one-line argument parser. parses enviroment variables to override the defaults | |
for k,v in pairs(opt) do opt[k] = tonumber(os.getenv(k)) or os.getenv(k) or opt[k] end | |
opt.hostname = sys.execute('hostname -s') .. ':' ..opt.display_port | |
print(opt) | |
torch.manualSeed(0) | |
torch.setnumthreads(1) | |
torch.setdefaulttensortype('torch.FloatTensor') | |
-- if using GPU, select indicated one | |
if opt.gpu > 0 then | |
require 'cunn' | |
cutorch.setDevice(opt.gpu) | |
end | |
-- create data loader | |
local DataLoader = paths.dofile('data/data.lua') | |
local data = DataLoader.new(opt.nThreads, opt.dataset, opt) | |
print("Dataset: " .. opt.dataset, " Size: ", data:size()) | |
-- define the model | |
local net | |
if opt.finetune == '' then -- build network from scratch | |
net = nn.Sequential() | |
net:add(nn.SpatialConvolution(3,96,11,11,4,4,2,2)) -- 224 -> 55 | |
net:add(nn.SpatialBatchNormalization(96)) | |
net:add(nn.ReLU(true)) | |
net:add(nn.SpatialMaxPooling(3,3,2,2)) -- 55 -> 27 | |
net:add(nn.SpatialConvolution(96,256,5,5,1,1,2,2)) -- 27 -> 27 | |
net:add(nn.SpatialBatchNormalization(256)) | |
net:add(nn.ReLU(true)) | |
net:add(nn.SpatialMaxPooling(3,3,2,2)) -- 27 -> 13 | |
net:add(nn.SpatialConvolution(256,384,3,3,1,1,1,1)) -- 13 -> 13 | |
net:add(nn.SpatialBatchNormalization(384)) | |
net:add(nn.ReLU(true)) | |
net:add(nn.SpatialConvolution(384,256,3,3,1,1,1,1)) -- 13 -> 13 | |
net:add(nn.SpatialBatchNormalization(256)) | |
net:add(nn.ReLU(true)) | |
net:add(nn.SpatialConvolution(256,256,3,3,1,1,1,1)) -- 13 -> 13 | |
net:add(nn.SpatialBatchNormalization(256)) | |
net:add(nn.ReLU(true)) | |
net:add(nn.SpatialMaxPooling(3,3,2,2)) -- 13 -> 6 | |
net:add(nn.View(256*6*6)) | |
net:add(nn.Linear(256*6*6, 4096)) | |
net:add(nn.BatchNormalization(4096)) | |
net:add(nn.ReLU()) | |
net:add(nn.Dropout(0.5)) | |
net:add(nn.Linear(4096, 4096)) | |
net:add(nn.BatchNormalization(4096)) | |
net:add(nn.ReLU()) | |
net:add(nn.Dropout(0.5)) | |
net:add(nn.Linear(4096, opt.nClasses)) | |
-- initialize the model | |
local function weights_init(m) | |
local name = torch.type(m) | |
if name:find('Convolution') then | |
m.weight:normal(0.0, 0.01) | |
m.bias:fill(0) | |
elseif name:find('BatchNormalization') then | |
if m.weight then m.weight:normal(1.0, 0.02) end | |
if m.bias then m.bias:fill(0) end | |
end | |
end | |
net:apply(weights_init) -- loop over all layers, applying weights_init | |
else -- load in existing network | |
print('loading ' .. opt.finetune) | |
net = torch.load(opt.finetune) | |
end | |
print(net) | |
-- define the loss | |
--local criterion = nn.CrossEntropyCriterion() | |
local criterion = nn.MSECriterion() | |
-- create the data placeholders | |
local input = torch.Tensor(opt.batchSize, 3, opt.fineSize, opt.fineSize) | |
local label = torch.Tensor(opt.batchSize) | |
local err | |
-- timers to roughly profile performance | |
local tm = torch.Timer() | |
local data_tm = torch.Timer() | |
-- ship everything to GPU if needed | |
if opt.gpu > 0 then | |
input = input:cuda() | |
label = label:cuda() | |
net:cuda() | |
criterion:cuda() | |
end | |
-- convert to cudnn if needed | |
if opt.gpu > 0 and opt.cudnn > 0 then | |
require 'cudnn' | |
net = cudnn.convert(net, cudnn) | |
end | |
-- get a vector of parameters | |
local parameters, gradParameters = net:getParameters() | |
-- show graphics | |
disp = require 'display' | |
disp.url = 'http://localhost:' .. opt.display_port .. '/events' | |
-- optimization closure | |
-- the optimizer will call this function to get the gradients | |
local data_im,data_label | |
local fx = function(x) | |
gradParameters:zero() | |
-- fetch data | |
data_tm:reset(); data_tm:resume() | |
data_im,data_label = data:getBatch() | |
data_tm:stop() | |
-- ship data to GPU | |
input:copy(data_im:squeeze()) | |
label:copy(data_label) | |
-- forward, backwards | |
local output = net:forward(input) | |
err = criterion:forward(output, label) | |
local df_do = criterion:backward(output, label) | |
net:backward(input, df_do) | |
-- return gradients | |
return err, gradParameters | |
end | |
local history = {} | |
-- parameters for the optimization | |
-- very important: you must only create this table once! | |
-- the optimizer will add fields to this table (such as momentum) | |
local optimState = { | |
learningRate = opt.lr, | |
beta1 = opt.beta1, | |
} | |
print('Starting Optimization...') | |
-- train main loop | |
for counter = 1,opt.niter do | |
collectgarbage() -- necessary sometimes | |
tm:reset() | |
-- do one iteration | |
optim.adam(fx, parameters, optimState) | |
-- logging | |
if counter % 10 == 1 then | |
table.insert(history, {counter, err}) | |
disp.plot(history, {win=1, title=opt.name, labels = {"iteration", "err"}}) | |
end | |
if counter % 100 == 1 then | |
w = net.modules[1].weight:float():clone() | |
for i=1,w:size(1) do w[i]:mul(1./w[i]:norm()) end | |
disp.image(w, {win=2, title=(opt.name .. ' conv1')}) | |
disp.image(data_im, {win=3, title=(opt.name .. ' batch')}) | |
end | |
print(('%s %s Iter: [%7d / %7d] Time: %.3f DataTime: %.3f Err: %.4f'):format( | |
opt.name, opt.hostname, counter, opt.niter, tm:time().real, data_tm:time().real, | |
err)) | |
-- save checkpoint | |
-- :clearState() compacts the model so it takes less space on disk | |
if counter % opt.saveIter == 0 then | |
print('Saving ' .. opt.name .. '/iter' .. counter .. '_net.t7') | |
paths.mkdir('checkpoints') | |
paths.mkdir('checkpoints/' .. opt.name) | |
torch.save('checkpoints/' .. opt.name .. '/net.t7', net:clearState()) | |
--torch.save('checkpoints/' .. opt.name .. '/iter' .. counter .. '_optim.t7', optimState) | |
torch.save('checkpoints/' .. opt.name .. '/history.t7', history) | |
end | |
-- decay the learning rate, if requested | |
if opt.lr_decay > 0 and counter % opt.lr_decay == 0 then | |
opt.lr = opt.lr / 10 | |
print('Decreasing learning rate to ' .. opt.lr) | |
-- create new optimState to reset momentum | |
optimState = { | |
learningRate = opt.lr, | |
beta1 = opt.beta1, | |
} | |
end | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment