taey16/convert_vgg.lua

## convert_vgg.lua
require 'loadcaffe'
require 'nn'

local model_root = '/storage/models/vgg'
local deploy_file = paths.concat(model_root, 'vgg_layer16_deploy.prototxt')
local weight_file = paths.concat(model_root, 'vgg_layer16.caffemodel')
local model = loadcaffe.load(deploy_file, weight_file, nn)

return model

## predict_ilsvrc12.lua
-- Imagemodel classification with Torch7 demo
require 'torch'
require 'loadcaffe'
require 'image'
require 'nn'

-- Helper functions

-- Loads the mapping from model outputs to human readable labels
function load_synset()
  local file = io.open '/storage/ImageNet/ILSVRC2012/synset_words.txt'
  local list = {}
  while true do
    local line = file:read()
    if not line then break end
    table.insert(list, string.sub(line,11))
  end
  return list
end

function save_images(x, n, file)
  file = file or "./out.png"
  local input = x:narrow(1, 1, n)
  local view = image.toDisplayTensor({input = input, padding = 2, nrow = 9, symmetric = true})
  image.save(file, view)
end

function augment_image(input)
  local oH = 224
  local oW = 224
  local iW = 256
  local iH = 256
  local w1 = math.ceil((iW-oW)/2)
  local h1 = math.ceil((iH-oH)/2)
  local output = torch.DoubleTensor(10, 3, 224, 224)
  output[{1 ,{},{},{}}] = image.crop(input, 1, 1, 225, 225)
  output[{2 ,{},{},{}}] = image.crop(input, iW-oW, 1, 256, 225)
  output[{3 ,{},{},{}}] = image.crop(input, 1, iH-oH, 225, 256)
  output[{4 ,{},{},{}}] = image.crop(input, iW-oW, iH-oH, 256, 256)
  output[{5 ,{},{},{}}] = image.crop(input, w1, h1, w1+oW, h1+oW)
  output[{6 ,{},{},{}}] = image.hflip(output[{1,{},{},{}}])
  output[{7 ,{},{},{}}] = image.hflip(output[{2,{},{},{}}])
  output[{8 ,{},{},{}}] = image.hflip(output[{3,{},{},{}}])
  output[{9 ,{},{},{}}] = image.hflip(output[{4,{},{},{}}])
  output[{10,{},{},{}}] = image.hflip(output[{5,{},{},{}}])
  -- save_images(output, 10)
  return output
end

-- Converts an image from RGB to BGR format and subtracts mean
function preprocess(im)
  -- rescale the image
  local input = image.scale(im,256,256,'bilinear')*255
  if input:dim() == 2 then
    input = input:view(1,input:size(1), input:size(2)):repeatTensor(3,1,1)
  -- 1-channel image
  elseif input:dim() == 3 and input:size(1) == 1 then
    input = input:repeatTensor(3,1,1)
  elseif input:dim() == 3 and input:size(1) == 3 then
    -- 3-channel image
  elseif input:dim() == 3 and input:size(1) == 4 then
    -- image with alpha
    input = input[{{1,3},{},{}}]
  else
    print(#input)
    error('not 2-channel or 3-channel image')
  end
  -- RGB2BGR
  local output = input:clone()
  output[{1,{},{}}] = input[{3,{},{}}]
  output[{3,{},{}}] = input[{1,{},{}}]

  -- subtract imagemodel mean
  output[{{1},{},{}}]:add(-103.939)
  output[{{2},{},{}}]:add(-116.779)
  output[{{3},{},{}}]:add(-123.68)

  return output
end

-- Setting up modelworks and downloading stuff if needed
proto_name = '/storage/models/vgg/vgg_layer16_deploy.prototxt'
model_name = '/storage/models/vgg/vgg_layer16.caffemodel'
backend = 'nn'

print '==> Loading modelwork'
model = loadcaffe.load(proto_name, model_name, backend)
-- remove the top softmax
model.modules[#model.modules] = nil
model:add(nn.SoftMax())
print(model)

-- as we want to classify, let's disable dropouts by enabling evaluation mode
model:evaluate()

print '==> Loading synsets'
synset_words = load_synset()

local dataset_root = '/storage/ImageNet/ILSVRC2012/val'
local file = io.open '/storage/ImageNet/ILSVRC2012/val_synset.txt'
local image_list ={}
local label_list ={}

while true do
  local line = file:read()
  if not line then break end
  item = string.split(line, ' ')
  table.insert(image_list, item[1])
  table.insert(label_list, item[2])
end

torch.setnumthreads(4)

local top1 = 0
local top5 = 0
local trials = 0
for k, fname in ipairs(image_list) do
  print(fname .. ' ' .. label_list[k])
  filename = paths.concat(dataset_root, fname)
  im = image.load(filename)
  label = tonumber(label_list[k]) + 1

  -- Have to resize and convert from RGB to BGR and subtract mean
  input = preprocess(im)
  input = augment_image(input)
  scores = model:forward(input):float()
  scores, classes = torch.mean(scores,1):view(-1):sort(true)

  --[[
  -- Propagate through the modelwork and sort outputs in decreasing order and show 5 best classes
  _,classes = model:forward(I):view(-1):float():sort(true)
  --]]

  trials = trials + 1
  top1 = top1 + classes[{{1,1}}]:eq(label):sum()
  top5 = top5 + classes[{{1,5}}]:eq(label):sum()
  io.flush(
    print(("%d top1: %d/%d = %.5f, top5: %d/%d = %.5f"):format(
      k, top1 , trials, top1 / trials * 100, top5, trials, top5 / trials * 100 )
    )
  )

end
	require 'loadcaffe'
	require 'nn'

	local model_root = '/storage/models/vgg'
	local deploy_file = paths.concat(model_root, 'vgg_layer16_deploy.prototxt')
	local weight_file = paths.concat(model_root, 'vgg_layer16.caffemodel')
	local model = loadcaffe.load(deploy_file, weight_file, nn)

	return model
	-- Imagemodel classification with Torch7 demo
	require 'torch'
	require 'loadcaffe'
	require 'image'
	require 'nn'

	-- Helper functions

	-- Loads the mapping from model outputs to human readable labels
	function load_synset()
	local file = io.open '/storage/ImageNet/ILSVRC2012/synset_words.txt'
	local list = {}
	while true do
	local line = file:read()
	if not line then break end
	table.insert(list, string.sub(line,11))
	end
	return list
	end

	function save_images(x, n, file)
	file = file or "./out.png"
	local input = x:narrow(1, 1, n)
	local view = image.toDisplayTensor({input = input, padding = 2, nrow = 9, symmetric = true})
	image.save(file, view)
	end

	function augment_image(input)
	local oH = 224
	local oW = 224
	local iW = 256
	local iH = 256
	local w1 = math.ceil((iW-oW)/2)
	local h1 = math.ceil((iH-oH)/2)
	local output = torch.DoubleTensor(10, 3, 224, 224)
	output[{1 ,{},{},{}}] = image.crop(input, 1, 1, 225, 225)
	output[{2 ,{},{},{}}] = image.crop(input, iW-oW, 1, 256, 225)
	output[{3 ,{},{},{}}] = image.crop(input, 1, iH-oH, 225, 256)
	output[{4 ,{},{},{}}] = image.crop(input, iW-oW, iH-oH, 256, 256)
	output[{5 ,{},{},{}}] = image.crop(input, w1, h1, w1+oW, h1+oW)
	output[{6 ,{},{},{}}] = image.hflip(output[{1,{},{},{}}])
	output[{7 ,{},{},{}}] = image.hflip(output[{2,{},{},{}}])
	output[{8 ,{},{},{}}] = image.hflip(output[{3,{},{},{}}])
	output[{9 ,{},{},{}}] = image.hflip(output[{4,{},{},{}}])
	output[{10,{},{},{}}] = image.hflip(output[{5,{},{},{}}])
	-- save_images(output, 10)
	return output
	end

	-- Converts an image from RGB to BGR format and subtracts mean
	function preprocess(im)
	-- rescale the image
	local input = image.scale(im,256,256,'bilinear')*255
	if input:dim() == 2 then
	input = input:view(1,input:size(1), input:size(2)):repeatTensor(3,1,1)
	-- 1-channel image
	elseif input:dim() == 3 and input:size(1) == 1 then
	input = input:repeatTensor(3,1,1)
	elseif input:dim() == 3 and input:size(1) == 3 then
	-- 3-channel image
	elseif input:dim() == 3 and input:size(1) == 4 then
	-- image with alpha
	input = input[{{1,3},{},{}}]
	else
	print(#input)
	error('not 2-channel or 3-channel image')
	end
	-- RGB2BGR
	local output = input:clone()
	output[{1,{},{}}] = input[{3,{},{}}]
	output[{3,{},{}}] = input[{1,{},{}}]

	-- subtract imagemodel mean
	output[{{1},{},{}}]:add(-103.939)
	output[{{2},{},{}}]:add(-116.779)
	output[{{3},{},{}}]:add(-123.68)

	return output
	end

	-- Setting up modelworks and downloading stuff if needed
	proto_name = '/storage/models/vgg/vgg_layer16_deploy.prototxt'
	model_name = '/storage/models/vgg/vgg_layer16.caffemodel'
	backend = 'nn'

	print '==> Loading modelwork'
	model = loadcaffe.load(proto_name, model_name, backend)
	-- remove the top softmax
	model.modules[#model.modules] = nil
	model:add(nn.SoftMax())
	print(model)

	-- as we want to classify, let's disable dropouts by enabling evaluation mode
	model:evaluate()

	print '==> Loading synsets'
	synset_words = load_synset()

	local dataset_root = '/storage/ImageNet/ILSVRC2012/val'
	local file = io.open '/storage/ImageNet/ILSVRC2012/val_synset.txt'
	local image_list ={}
	local label_list ={}

	while true do
	local line = file:read()
	if not line then break end
	item = string.split(line, ' ')
	table.insert(image_list, item[1])
	table.insert(label_list, item[2])
	end

	torch.setnumthreads(4)

	local top1 = 0
	local top5 = 0
	local trials = 0
	for k, fname in ipairs(image_list) do
	print(fname .. ' ' .. label_list[k])
	filename = paths.concat(dataset_root, fname)
	im = image.load(filename)
	label = tonumber(label_list[k]) + 1

	-- Have to resize and convert from RGB to BGR and subtract mean
	input = preprocess(im)
	input = augment_image(input)
	scores = model:forward(input):float()
	scores, classes = torch.mean(scores,1):view(-1):sort(true)

	--[[
	-- Propagate through the modelwork and sort outputs in decreasing order and show 5 best classes
	_,classes = model:forward(I):view(-1):float():sort(true)
	--]]

	trials = trials + 1
	top1 = top1 + classes[{{1,1}}]:eq(label):sum()
	top5 = top5 + classes[{{1,5}}]:eq(label):sum()
	io.flush(
	print(("%d top1: %d/%d = %.5f, top5: %d/%d = %.5f"):format(
	k, top1 , trials, top1 / trials * 100, top5, trials, top5 / trials * 100 )
	)
	)

	end