Last active
August 18, 2016 09:43
-
-
Save taey16/e4c2f2317eb386bebea0 to your computer and use it in GitHub Desktop.
convert_caffe
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'loadcaffe' | |
require 'nn' | |
local model_root = '/storage/models/vgg' | |
local deploy_file = paths.concat(model_root, 'vgg_layer16_deploy.prototxt') | |
local weight_file = paths.concat(model_root, 'vgg_layer16.caffemodel') | |
local model = loadcaffe.load(deploy_file, weight_file, nn) | |
return model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Imagemodel classification with Torch7 demo | |
require 'torch' | |
require 'loadcaffe' | |
require 'image' | |
require 'nn' | |
-- Helper functions | |
-- Loads the mapping from model outputs to human readable labels | |
function load_synset() | |
local file = io.open '/storage/ImageNet/ILSVRC2012/synset_words.txt' | |
local list = {} | |
while true do | |
local line = file:read() | |
if not line then break end | |
table.insert(list, string.sub(line,11)) | |
end | |
return list | |
end | |
function save_images(x, n, file) | |
file = file or "./out.png" | |
local input = x:narrow(1, 1, n) | |
local view = image.toDisplayTensor({input = input, padding = 2, nrow = 9, symmetric = true}) | |
image.save(file, view) | |
end | |
function augment_image(input) | |
local oH = 224 | |
local oW = 224 | |
local iW = 256 | |
local iH = 256 | |
local w1 = math.ceil((iW-oW)/2) | |
local h1 = math.ceil((iH-oH)/2) | |
local output = torch.DoubleTensor(10, 3, 224, 224) | |
output[{1 ,{},{},{}}] = image.crop(input, 1, 1, 225, 225) | |
output[{2 ,{},{},{}}] = image.crop(input, iW-oW, 1, 256, 225) | |
output[{3 ,{},{},{}}] = image.crop(input, 1, iH-oH, 225, 256) | |
output[{4 ,{},{},{}}] = image.crop(input, iW-oW, iH-oH, 256, 256) | |
output[{5 ,{},{},{}}] = image.crop(input, w1, h1, w1+oW, h1+oW) | |
output[{6 ,{},{},{}}] = image.hflip(output[{1,{},{},{}}]) | |
output[{7 ,{},{},{}}] = image.hflip(output[{2,{},{},{}}]) | |
output[{8 ,{},{},{}}] = image.hflip(output[{3,{},{},{}}]) | |
output[{9 ,{},{},{}}] = image.hflip(output[{4,{},{},{}}]) | |
output[{10,{},{},{}}] = image.hflip(output[{5,{},{},{}}]) | |
-- save_images(output, 10) | |
return output | |
end | |
-- Converts an image from RGB to BGR format and subtracts mean | |
function preprocess(im) | |
-- rescale the image | |
local input = image.scale(im,256,256,'bilinear')*255 | |
if input:dim() == 2 then | |
input = input:view(1,input:size(1), input:size(2)):repeatTensor(3,1,1) | |
-- 1-channel image | |
elseif input:dim() == 3 and input:size(1) == 1 then | |
input = input:repeatTensor(3,1,1) | |
elseif input:dim() == 3 and input:size(1) == 3 then | |
-- 3-channel image | |
elseif input:dim() == 3 and input:size(1) == 4 then | |
-- image with alpha | |
input = input[{{1,3},{},{}}] | |
else | |
print(#input) | |
error('not 2-channel or 3-channel image') | |
end | |
-- RGB2BGR | |
local output = input:clone() | |
output[{1,{},{}}] = input[{3,{},{}}] | |
output[{3,{},{}}] = input[{1,{},{}}] | |
-- subtract imagemodel mean | |
output[{{1},{},{}}]:add(-103.939) | |
output[{{2},{},{}}]:add(-116.779) | |
output[{{3},{},{}}]:add(-123.68) | |
return output | |
end | |
-- Setting up modelworks and downloading stuff if needed | |
proto_name = '/storage/models/vgg/vgg_layer16_deploy.prototxt' | |
model_name = '/storage/models/vgg/vgg_layer16.caffemodel' | |
backend = 'nn' | |
print '==> Loading modelwork' | |
model = loadcaffe.load(proto_name, model_name, backend) | |
-- remove the top softmax | |
model.modules[#model.modules] = nil | |
model:add(nn.SoftMax()) | |
print(model) | |
-- as we want to classify, let's disable dropouts by enabling evaluation mode | |
model:evaluate() | |
print '==> Loading synsets' | |
synset_words = load_synset() | |
local dataset_root = '/storage/ImageNet/ILSVRC2012/val' | |
local file = io.open '/storage/ImageNet/ILSVRC2012/val_synset.txt' | |
local image_list ={} | |
local label_list ={} | |
while true do | |
local line = file:read() | |
if not line then break end | |
item = string.split(line, ' ') | |
table.insert(image_list, item[1]) | |
table.insert(label_list, item[2]) | |
end | |
torch.setnumthreads(4) | |
local top1 = 0 | |
local top5 = 0 | |
local trials = 0 | |
for k, fname in ipairs(image_list) do | |
print(fname .. ' ' .. label_list[k]) | |
filename = paths.concat(dataset_root, fname) | |
im = image.load(filename) | |
label = tonumber(label_list[k]) + 1 | |
-- Have to resize and convert from RGB to BGR and subtract mean | |
input = preprocess(im) | |
input = augment_image(input) | |
scores = model:forward(input):float() | |
scores, classes = torch.mean(scores,1):view(-1):sort(true) | |
--[[ | |
-- Propagate through the modelwork and sort outputs in decreasing order and show 5 best classes | |
_,classes = model:forward(I):view(-1):float():sort(true) | |
--]] | |
trials = trials + 1 | |
top1 = top1 + classes[{{1,1}}]:eq(label):sum() | |
top5 = top5 + classes[{{1,5}}]:eq(label):sum() | |
io.flush( | |
print(("%d top1: %d/%d = %.5f, top5: %d/%d = %.5f"):format( | |
k, top1 , trials, top1 / trials * 100, top5, trials, top5 / trials * 100 ) | |
) | |
) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment