htoyryla/neural_mirage5.lua

## neural_mirage5.lua
--[[
Neural-mirage 5
by @htoyryla 3/2016, 3/2017

based on neural-style by @jcjohnson

for more info see
http://liipetti.net/erratic/2016/03/31/i-have-seen-a-neural-mirage/
http://liipetti.net/erratic/2016/04/20/getting-the-space-back/


changes and new options vs neural-style:

possible to use fc layers to generate content
use -fc_weight to control how much

relu layers can be used to control spatial arrangement,
use content_weight to control how much

by default content_image is used as style image, too

-label_file:
path to a file containing the labels, used to print what the model is seeing in the image

]]


require 'torch'
require 'nn'
require 'image'
require 'optim'

require 'loadcaffe'

--------------------------------------------------------------------------------

local cmd = torch.CmdLine()

-- Basic options
cmd:option('-style_image', 'none',
           'Style target image')
cmd:option('-style_blend_weights', 'nil')
cmd:option('-content_image', 'examples/inputs/tubingen.jpg',
           'Content target image')
cmd:option('-image_size', 512, 'Maximum height / width of generated image')
cmd:option('-gpu', 0, 'Zero-indexed ID of the GPU to use; for CPU mode set -gpu = -1')

-- Optimization options
cmd:option('-content_weight', 1.5)
cmd:option('-style_weight', 1e4)
cmd:option('-fc_weight', 1e6)
cmd:option('-tv_weight', 1e-3)
cmd:option('-num_iterations', 2000)
cmd:option('-normalize_gradients', false)
cmd:option('-init', 'random', 'random|image')
cmd:option('-optimizer', 'lbfgs', 'lbfgs|adam')
cmd:option('-learning_rate', 0.1)

-- Output options
cmd:option('-print_iter', 50)
cmd:option('-save_iter', 50)
cmd:option('-output_image', 'mirage-out.png')

-- Other options
cmd:option('-style_scale', 1.0)
cmd:option('-pooling', 'max', 'max|avg')
cmd:option('-proto_file', 'models/vgg16_places_deploy_10.prototxt')
cmd:option('-model_file', 'models/vgg16_places_snapshot_iter_765280.caffemodel')
cmd:option('-label_file', 'models/places205_categories.txt')
cmd:option('-backend', 'nn', 'nn|cudnn|clnn')
cmd:option('-cudnn_autotune', false)
cmd:option('-seed', -1)

cmd:option('-content_layers', 'relu4_1,fc8', 'layers for content')
cmd:option('-style_layers', 'relu2_1,relu3_1,relu4_1', 'layers for style')

labels = {}

local function main(params)
  print("Neural-mirage")

  f = io.open(params.label_file)
  if f then
    for line in f:lines() do
      table.insert(labels, line)
    end
  end

  if params.gpu >= 0 then
    if params.backend ~= 'clnn' then
      require 'cutorch'
      require 'cunn'
      cutorch.setDevice(params.gpu + 1)
    else
      require 'clnn'
      require 'cltorch'
      cltorch.setDevice(params.gpu + 1)
    end
  else
    params.backend = 'nn'
  end

  if params.backend == 'cudnn' then
    require 'cudnn'
    if params.cudnn_autotune then
      cudnn.benchmark = true
    end
    cudnn.SpatialConvolution.accGradParameters = nn.SpatialConvolutionMM.accGradParameters -- ie: nop
  end

  local loadcaffe_backend = params.backend
  if params.backend == 'clnn' then loadcaffe_backend = 'nn' end
  local cnn = loadcaffe.load(params.proto_file, params.model_file, loadcaffe_backend):float()
  if params.gpu >= 0 then
    if params.backend ~= 'clnn' then
      --cnn:cuda()
    else
      cnn:cl()
    end
  end

  local content_image = image.load(params.content_image, 3)
  content_image = image.scale(content_image, params.image_size, 'bilinear')
  local content_image_caffe = preprocess(content_image):float()

  if (params.style_image == "none") then
    params.style_image = params.content_image
  end

  local style_size = params.image_size
  local style_image_list = params.style_image:split(',')
  local style_images_caffe = {}
  for _, img_path in ipairs(style_image_list) do
    local img = image.load(img_path, 3)
    img = image.scale(img, style_size * params.style_scale, 'bilinear')
    local img_caffe = preprocess(img):float()
    table.insert(style_images_caffe, img_caffe)
  end

  -- Handle style blending weights for multiple style inputs
  local style_blend_weights = nil
  if params.style_blend_weights == 'nil' then
    -- Style blending not specified, so use equal weighting
    style_blend_weights = {}
    for i = 1, #style_image_list do
      table.insert(style_blend_weights, 1.0)
    end
  else
    style_blend_weights = params.style_blend_weights:split(',')
    assert(#style_blend_weights == #style_image_list,
      '-style_blend_weights and -style_images must have the same number of elements')
  end
  -- Normalize the style blending weights so they sum to 1
  local style_blend_sum = 0
  for i = 1, #style_blend_weights do
    style_blend_weights[i] = tonumber(style_blend_weights[i])
    style_blend_sum = style_blend_sum + style_blend_weights[i]
  end
  for i = 1, #style_blend_weights do
    style_blend_weights[i] = style_blend_weights[i] / style_blend_sum
  end


  if params.gpu >= 0 then
    if params.backend ~= 'clnn' then
      content_image_caffe = content_image_caffe:cuda()
      for i = 1, #style_images_caffe do
        style_images_caffe[i] = style_images_caffe[i]:cuda()
      end
    else
      content_image_caffe = content_image_caffe:cl()
      for i = 1, #style_images_caffe do
        style_images_caffe[i] = style_images_caffe[i]:cl()
      end
    end
  end

  local content_layers = params.content_layers:split(",")
  local style_layers = params.style_layers:split(",")

  -- Set up the network, inserting style and content loss modules
  local content_losses, style_losses = {}, {}
  local next_content_idx, next_style_idx = 1, 1
  local net = nn.Sequential()
  if params.tv_weight > 0 then
    local tv_mod = nn.TVLoss(params.tv_weight):float()
    if params.gpu >= 0 then
      if params.backend ~= 'clnn' then
        tv_mod:cuda()
      else
        tv_mod:cl()
      end
    end
    net:add(tv_mod)
  end
  for i = 1, #cnn do
    local layer = cnn:get(i)
    if (torch.type(layer) == "nn.View") then
        addlayer = nn.SpatialAdaptiveMaxPooling(7,7):float()
        if params.gpu >= 0 then
          if params.backend ~= 'clnn' then
            addlayer:cuda()
          else
            addlayer:cl()
          end
        end
	net:add(addlayer)
    end
    if next_content_idx <= #content_layers or next_style_idx <= #style_layers then

      local name = layer.name
      local layer_type = torch.type(layer)
      local is_pooling = (layer_type == 'cudnn.SpatialMaxPooling' or layer_type == 'nn.SpatialMaxPooling')
      if is_pooling and params.pooling == 'avg' then
        assert(layer.padW == 0 and layer.padH == 0)
        local kW, kH = layer.kW, layer.kH
        local dW, dH = layer.dW, layer.dH
        local avg_pool_layer = nn.SpatialAveragePooling(kW, kH, dW, dH):float()
        if params.gpu >= 0 then
          if params.backend ~= 'clnn' then
            avg_pool_layer:cuda()
          else
            avg_pool_layer:cl()
          end
        end
        local msg = 'Replacing max pooling at layer %d with average pooling'
        print(string.format(msg, i))
	---print(avg_pool_layer.name)
        net:add(avg_pool_layer)
      else
        if layer_type ~= "nn.Dropout" then
          --print(i, layer.name)
          if params.gpu >= 0 then
            if params.backend ~= 'clnn' then
              layer:cuda()
            else
              layer:cl()
            end
          end
          net:add(layer)
        end
      end
      if name == content_layers[next_content_idx] then
        local target = net:forward(content_image_caffe):clone()
        local norm = params.normalize_gradients
		local cweight = params.content_weight
		local pos, _ = string.find(layer.name, "fc")
		if pos == 1 then  -- this is an fc layer
		  cweight = params.fc_weight
		end
        print("Setting up content layer", i, ":", layer.name, cweight)
        local loss_module = nn.ContentLoss(cweight, target, norm):float()
        if params.gpu >= 0 then
          if params.backend ~= 'clnn' then
            loss_module:cuda()
          else
            loss_module:cl()
          end
        end
        net:add(loss_module)
        table.insert(content_losses, loss_module)
        next_content_idx = next_content_idx + 1
      end
      if name == style_layers[next_style_idx] then
        print("Setting up style layer  ", i, ":", layer.name)
        local gram = GramMatrix():float()
        if params.gpu >= 0 then
          if params.backend ~= 'clnn' then
            gram = gram:cuda()
          else
            gram = gram:cl()
          end
        end
        local target = nil
        for i = 1, #style_images_caffe do
          local target_features = net:forward(style_images_caffe[i]):clone()
          local target_i = gram:forward(target_features):clone()
          target_i:div(target_features:nElement())
          target_i:mul(style_blend_weights[i])
          if i == 1 then
            target = target_i
          else
            target:add(target_i)
          end
        end
        local norm = params.normalize_gradients
        local loss_module = nn.StyleLoss(params.style_weight, target, norm):float()
        if params.gpu >= 0 then
          if params.backend ~= 'clnn' then
            loss_module:cuda()
          else
            loss_module:cl()
          end
        end
        net:add(loss_module)
        table.insert(style_losses, loss_module)
        next_style_idx = next_style_idx + 1
      end
    else
      if layer_type ~= "nn.Dropout" then
          if params.gpu >= 0 then
            if params.backend ~= 'clnn' then
              layer:cuda()
            else
              layer:cl()
            end
          end
          net:add(layer)
       end
    end
  end

  --vgg16 places lacks softmax at the end, so insert one
  local prob = nn.SoftMax():float()
  if params.gpu >= 0 then
            if params.backend ~= 'clnn' then
              prob:cuda()
            else
              prob:cl()
            end
  end
  net:add(prob)

  -- We don't need the base CNN anymore, so clean it up to save memory.
  cnn = nil
  for i=1,#net.modules do
    local module = net.modules[i]
    if torch.type(module) == 'nn.SpatialConvolutionMM' then
      --  -- remove these, not used, but uses gpu memory
        module.gradWeight = nil
        module.gradBias = nil
    end
  end
  collectgarbage()

  -- Initialize the image
  if params.seed >= 0 then
    torch.manualSeed(params.seed)
  end
  local img = nil
  if params.init == 'random' then
    img = torch.randn(content_image:size()):float():mul(0.001)
  elseif params.init == 'image' then
    img = content_image_caffe:float():clone()
  else
    error('Invalid init type')
  end
  if params.gpu >= 0 then
    if params.backend ~= 'clnn' then
      img = img:cuda()
    else
      img = img:cl()
    end
  end

 --try the network with content image to detect features
  cimg = content_image_caffe:clone():float()
  if params.gpu >= 0 then
    if params.backend ~= 'clnn' then
      cimg = cimg:cuda()
    else
      cimg = cimg:cl()
    end
  end

  if f then
    local p = net:forward(cimg)
    print("----------- seeing the features --------------")
    for i=1, #labels do
      if p[i] > 0.03 then
        print(string.format("%.4f   %s", p[i], labels[i]))
      end
    end
  end

  -- Run it through the network once to get the proper size for the gradient
  -- All the gradients will come from the extra loss modules, so we just pass
  -- zeros into the top of the net on the backward pass.
  local y = net:forward(img)
  local dy = img.new(#y):zero()


  -- Declaring this here lets us access it in maybe_print
  local optim_state = nil
  if params.optimizer == 'lbfgs' then
    optim_state = {
      maxIter = params.num_iterations,
      verbose=true,
    }
  elseif params.optimizer == 'adam' then
    optim_state = {
      learningRate = params.learning_rate,
    }
  else
    error(string.format('Unrecognized optimizer "%s"', params.optimizer))
  end

  local function maybe_print(t, loss, p)
    local verbose = (params.print_iter > 0 and t % params.print_iter == 0)
    if verbose then
      print(string.format('Iteration %d / %d', t, params.num_iterations))
      for i, loss_module in ipairs(content_losses) do
        print(string.format('  Content %d loss: %f', i, loss_module.loss))
      end
      for i, loss_module in ipairs(style_losses) do
        print(string.format('  Style %d loss: %f', i, loss_module.loss))
      end
      print(string.format('  Total loss: %f', loss))

      if f then
        for i=1, #labels do
          if p[i] > 0.03 then
            print(string.format("%.4f   %s", p[i], labels[i]))
          end
        end
      end
    end
  end

  local function maybe_save(t)
    local should_save = params.save_iter > 0 and t % params.save_iter == 0
    should_save = should_save or t == params.num_iterations
    if should_save then
      local disp = deprocess(img:double())
      disp = image.minmax{tensor=disp, min=0, max=1}
      local filename = build_filename(params.output_image, t)
      if t == params.num_iterations then
        filename = params.output_image
      end
      image.save(filename, disp)
    end
  end

  -- Function to evaluate loss and gradient. We run the net forward and
  -- backward to get the gradient, and sum up losses from the loss modules.
  -- optim.lbfgs internally handles iteration and calls this fucntion many
  -- times, so we manually count the number of iterations to handle printing
  -- and saving intermediate results.
  local num_calls = 0
  local function feval(x)
    num_calls = num_calls + 1
    local p = net:forward(x)

    local grad = net:updateGradInput(x, dy)
    local loss = 0
    for _, mod in ipairs(content_losses) do
      loss = loss + mod.loss
    end
    for _, mod in ipairs(style_losses) do
      loss = loss + mod.loss
    end
    maybe_print(num_calls, loss, p)
    maybe_save(num_calls)

    collectgarbage()
    -- optim.lbfgs expects a vector for gradients
    return loss, grad:view(grad:nElement())
  end

  -- Run optimization.
  if params.optimizer == 'lbfgs' then
    print('Running optimization with L-BFGS')
    local x, losses = optim.lbfgs(feval, img, optim_state)
  elseif params.optimizer == 'adam' then
    print('Running optimization with ADAM')
    for t = 1, params.num_iterations do
      local x, losses = optim.adam(feval, img, optim_state)
    end
  end
end


function build_filename(output_image, iteration)
  local ext = paths.extname(output_image)
  local basename = paths.basename(output_image, ext)
  local directory = paths.dirname(output_image)
  return string.format('%s/%s_%d.%s',directory, basename, iteration, ext)
end


-- Preprocess an image before passing it to a Caffe model.
-- We need to rescale from [0, 1] to [0, 255], convert from RGB to BGR,
-- and subtract the mean pixel.
function preprocess(img)
  local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68})
  local perm = torch.LongTensor{3, 2, 1}
  img = img:index(1, perm):mul(256.0)
  mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img)
  img:add(-1, mean_pixel)
  return img
end


-- Undo the above preprocessing.
function deprocess(img)
  local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68})
  mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img)
  img = img + mean_pixel
  local perm = torch.LongTensor{3, 2, 1}
  img = img:index(1, perm):div(256.0)
  return img
end


-- Define an nn Module to compute content loss in-place
local ContentLoss, parent = torch.class('nn.ContentLoss', 'nn.Module')

function ContentLoss:__init(strength, target, normalize)
  parent.__init(self)
  self.strength = strength
  self.target = target
  self.normalize = normalize or false
  self.loss = 0
  self.crit = nn.MSECriterion()
end

function ContentLoss:updateOutput(input)
  if input:nElement() == self.target:nElement() then
    self.loss = self.crit:forward(input, self.target) * self.strength
  else
    print('WARNING: Skipping content loss')
  end
  self.output = input
  return self.output
end

function ContentLoss:updateGradInput(input, gradOutput)
  if input:nElement() == self.target:nElement() then
    self.gradInput = self.crit:backward(input, self.target)
  end
  if self.normalize then
    self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
  end
  self.gradInput:mul(self.strength)
  self.gradInput:add(gradOutput)
  return self.gradInput
end

-- Returns a network that computes the CxC Gram matrix from inputs
-- of size C x H x W
function GramMatrix()
  local net = nn.Sequential()
  net:add(nn.View(-1):setNumInputDims(2))
  local concat = nn.ConcatTable()
  concat:add(nn.Identity())
  concat:add(nn.Identity())
  net:add(concat)
  net:add(nn.MM(false, true))
  return net
end


-- Define an nn Module to compute style loss in-place
local StyleLoss, parent = torch.class('nn.StyleLoss', 'nn.Module')

function StyleLoss:__init(strength, target, normalize)
  parent.__init(self)
  self.normalize = normalize or false
  self.strength = strength
  self.target = target
  self.loss = 0

  self.gram = GramMatrix()
  self.G = nil
  self.crit = nn.MSECriterion()
end

function StyleLoss:updateOutput(input)
  self.G = self.gram:forward(input)
  self.G:div(input:nElement())
  self.loss = self.crit:forward(self.G, self.target)
  self.loss = self.loss * self.strength
  self.output = input
  return self.output
end

function StyleLoss:updateGradInput(input, gradOutput)
  local dG = self.crit:backward(self.G, self.target)
  dG:div(input:nElement())
  self.gradInput = self.gram:backward(input, dG)
  if self.normalize then
    self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
  end
  self.gradInput:mul(self.strength)
  self.gradInput:add(gradOutput)
  return self.gradInput
end


local TVLoss, parent = torch.class('nn.TVLoss', 'nn.Module')

function TVLoss:__init(strength)
  parent.__init(self)
  self.strength = strength
  self.x_diff = torch.Tensor()
  self.y_diff = torch.Tensor()
end

function TVLoss:updateOutput(input)
  self.output = input
  return self.output
end

-- TV loss backward pass inspired by kaishengtai/neuralart
function TVLoss:updateGradInput(input, gradOutput)
  self.gradInput:resizeAs(input):zero()
  local C, H, W = input:size(1), input:size(2), input:size(3)
  self.x_diff:resize(3, H - 1, W - 1)
  self.y_diff:resize(3, H - 1, W - 1)
  self.x_diff:copy(input[{{}, {1, -2}, {1, -2}}])
  self.x_diff:add(-1, input[{{}, {1, -2}, {2, -1}}])
  self.y_diff:copy(input[{{}, {1, -2}, {1, -2}}])
  self.y_diff:add(-1, input[{{}, {2, -1}, {1, -2}}])
  self.gradInput[{{}, {1, -2}, {1, -2}}]:add(self.x_diff):add(self.y_diff)
  self.gradInput[{{}, {1, -2}, {2, -1}}]:add(-1, self.x_diff)
  self.gradInput[{{}, {2, -1}, {1, -2}}]:add(-1, self.y_diff)
  self.gradInput:mul(self.strength)
  self.gradInput:add(gradOutput)
  return self.gradInput
end


local params = cmd:parse(arg)
main(params)
	--[[
	Neural-mirage 5
	by @htoyryla 3/2016, 3/2017

	based on neural-style by @jcjohnson

	for more info see
	http://liipetti.net/erratic/2016/03/31/i-have-seen-a-neural-mirage/
	http://liipetti.net/erratic/2016/04/20/getting-the-space-back/


	changes and new options vs neural-style:

	possible to use fc layers to generate content
	use -fc_weight to control how much

	relu layers can be used to control spatial arrangement,
	use content_weight to control how much

	by default content_image is used as style image, too

	-label_file:
	path to a file containing the labels, used to print what the model is seeing in the image

	]]



	require 'torch'
	require 'nn'
	require 'image'
	require 'optim'

	require 'loadcaffe'

	--------------------------------------------------------------------------------

	local cmd = torch.CmdLine()

	-- Basic options
	cmd:option('-style_image', 'none',
	'Style target image')
	cmd:option('-style_blend_weights', 'nil')
	cmd:option('-content_image', 'examples/inputs/tubingen.jpg',
	'Content target image')
	cmd:option('-image_size', 512, 'Maximum height / width of generated image')
	cmd:option('-gpu', 0, 'Zero-indexed ID of the GPU to use; for CPU mode set -gpu = -1')

	-- Optimization options
	cmd:option('-content_weight', 1.5)
	cmd:option('-style_weight', 1e4)
	cmd:option('-fc_weight', 1e6)
	cmd:option('-tv_weight', 1e-3)
	cmd:option('-num_iterations', 2000)
	cmd:option('-normalize_gradients', false)
	cmd:option('-init', 'random', 'random\|image')
	cmd:option('-optimizer', 'lbfgs', 'lbfgs\|adam')
	cmd:option('-learning_rate', 0.1)

	-- Output options
	cmd:option('-print_iter', 50)
	cmd:option('-save_iter', 50)
	cmd:option('-output_image', 'mirage-out.png')

	-- Other options
	cmd:option('-style_scale', 1.0)
	cmd:option('-pooling', 'max', 'max\|avg')
	cmd:option('-proto_file', 'models/vgg16_places_deploy_10.prototxt')
	cmd:option('-model_file', 'models/vgg16_places_snapshot_iter_765280.caffemodel')
	cmd:option('-label_file', 'models/places205_categories.txt')
	cmd:option('-backend', 'nn', 'nn\|cudnn\|clnn')
	cmd:option('-cudnn_autotune', false)
	cmd:option('-seed', -1)

	cmd:option('-content_layers', 'relu4_1,fc8', 'layers for content')
	cmd:option('-style_layers', 'relu2_1,relu3_1,relu4_1', 'layers for style')

	labels = {}

	local function main(params)
	print("Neural-mirage")

	f = io.open(params.label_file)
	if f then
	for line in f:lines() do
	table.insert(labels, line)
	end
	end

	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	require 'cutorch'
	require 'cunn'
	cutorch.setDevice(params.gpu + 1)
	else
	require 'clnn'
	require 'cltorch'
	cltorch.setDevice(params.gpu + 1)
	end
	else
	params.backend = 'nn'
	end

	if params.backend == 'cudnn' then
	require 'cudnn'
	if params.cudnn_autotune then
	cudnn.benchmark = true
	end
	cudnn.SpatialConvolution.accGradParameters = nn.SpatialConvolutionMM.accGradParameters -- ie: nop
	end

	local loadcaffe_backend = params.backend
	if params.backend == 'clnn' then loadcaffe_backend = 'nn' end
	local cnn = loadcaffe.load(params.proto_file, params.model_file, loadcaffe_backend):float()
	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	--cnn:cuda()
	else
	cnn:cl()
	end
	end

	local content_image = image.load(params.content_image, 3)
	content_image = image.scale(content_image, params.image_size, 'bilinear')
	local content_image_caffe = preprocess(content_image):float()

	if (params.style_image == "none") then
	params.style_image = params.content_image
	end

	local style_size = params.image_size
	local style_image_list = params.style_image:split(',')
	local style_images_caffe = {}
	for _, img_path in ipairs(style_image_list) do
	local img = image.load(img_path, 3)
	img = image.scale(img, style_size * params.style_scale, 'bilinear')
	local img_caffe = preprocess(img):float()
	table.insert(style_images_caffe, img_caffe)
	end

	-- Handle style blending weights for multiple style inputs
	local style_blend_weights = nil
	if params.style_blend_weights == 'nil' then
	-- Style blending not specified, so use equal weighting
	style_blend_weights = {}
	for i = 1, #style_image_list do
	table.insert(style_blend_weights, 1.0)
	end
	else
	style_blend_weights = params.style_blend_weights:split(',')
	assert(#style_blend_weights == #style_image_list,
	'-style_blend_weights and -style_images must have the same number of elements')
	end
	-- Normalize the style blending weights so they sum to 1
	local style_blend_sum = 0
	for i = 1, #style_blend_weights do
	style_blend_weights[i] = tonumber(style_blend_weights[i])
	style_blend_sum = style_blend_sum + style_blend_weights[i]
	end
	for i = 1, #style_blend_weights do
	style_blend_weights[i] = style_blend_weights[i] / style_blend_sum
	end


	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	content_image_caffe = content_image_caffe:cuda()
	for i = 1, #style_images_caffe do
	style_images_caffe[i] = style_images_caffe[i]:cuda()
	end
	else
	content_image_caffe = content_image_caffe:cl()
	for i = 1, #style_images_caffe do
	style_images_caffe[i] = style_images_caffe[i]:cl()
	end
	end
	end

	local content_layers = params.content_layers:split(",")
	local style_layers = params.style_layers:split(",")

	-- Set up the network, inserting style and content loss modules
	local content_losses, style_losses = {}, {}
	local next_content_idx, next_style_idx = 1, 1
	local net = nn.Sequential()
	if params.tv_weight > 0 then
	local tv_mod = nn.TVLoss(params.tv_weight):float()
	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	tv_mod:cuda()
	else
	tv_mod:cl()
	end
	end
	net:add(tv_mod)
	end
	for i = 1, #cnn do
	local layer = cnn:get(i)
	if (torch.type(layer) == "nn.View") then
	addlayer = nn.SpatialAdaptiveMaxPooling(7,7):float()
	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	addlayer:cuda()
	else
	addlayer:cl()
	end
	end
	net:add(addlayer)
	end
	if next_content_idx <= #content_layers or next_style_idx <= #style_layers then

	local name = layer.name
	local layer_type = torch.type(layer)
	local is_pooling = (layer_type == 'cudnn.SpatialMaxPooling' or layer_type == 'nn.SpatialMaxPooling')
	if is_pooling and params.pooling == 'avg' then
	assert(layer.padW == 0 and layer.padH == 0)
	local kW, kH = layer.kW, layer.kH
	local dW, dH = layer.dW, layer.dH
	local avg_pool_layer = nn.SpatialAveragePooling(kW, kH, dW, dH):float()
	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	avg_pool_layer:cuda()
	else
	avg_pool_layer:cl()
	end
	end
	local msg = 'Replacing max pooling at layer %d with average pooling'
	print(string.format(msg, i))
	---print(avg_pool_layer.name)
	net:add(avg_pool_layer)
	else
	if layer_type ~= "nn.Dropout" then
	--print(i, layer.name)
	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	layer:cuda()
	else
	layer:cl()
	end
	end
	net:add(layer)
	end
	end
	if name == content_layers[next_content_idx] then
	local target = net:forward(content_image_caffe):clone()
	local norm = params.normalize_gradients
	local cweight = params.content_weight
	local pos, _ = string.find(layer.name, "fc")
	if pos == 1 then -- this is an fc layer
	cweight = params.fc_weight
	end
	print("Setting up content layer", i, ":", layer.name, cweight)
	local loss_module = nn.ContentLoss(cweight, target, norm):float()
	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	loss_module:cuda()
	else
	loss_module:cl()
	end
	end
	net:add(loss_module)
	table.insert(content_losses, loss_module)
	next_content_idx = next_content_idx + 1
	end
	if name == style_layers[next_style_idx] then
	print("Setting up style layer ", i, ":", layer.name)
	local gram = GramMatrix():float()
	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	gram = gram:cuda()
	else
	gram = gram:cl()
	end
	end
	local target = nil
	for i = 1, #style_images_caffe do
	local target_features = net:forward(style_images_caffe[i]):clone()
	local target_i = gram:forward(target_features):clone()
	target_i:div(target_features:nElement())
	target_i:mul(style_blend_weights[i])
	if i == 1 then
	target = target_i
	else
	target:add(target_i)
	end
	end
	local norm = params.normalize_gradients
	local loss_module = nn.StyleLoss(params.style_weight, target, norm):float()
	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	loss_module:cuda()
	else
	loss_module:cl()
	end
	end
	net:add(loss_module)
	table.insert(style_losses, loss_module)
	next_style_idx = next_style_idx + 1
	end
	else
	if layer_type ~= "nn.Dropout" then
	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	layer:cuda()
	else
	layer:cl()
	end
	end
	net:add(layer)
	end
	end
	end

	--vgg16 places lacks softmax at the end, so insert one
	local prob = nn.SoftMax():float()
	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	prob:cuda()
	else
	prob:cl()
	end
	end
	net:add(prob)

	-- We don't need the base CNN anymore, so clean it up to save memory.
	cnn = nil
	for i=1,#net.modules do
	local module = net.modules[i]
	if torch.type(module) == 'nn.SpatialConvolutionMM' then
	-- -- remove these, not used, but uses gpu memory
	module.gradWeight = nil
	module.gradBias = nil
	end
	end
	collectgarbage()

	-- Initialize the image
	if params.seed >= 0 then
	torch.manualSeed(params.seed)
	end
	local img = nil
	if params.init == 'random' then
	img = torch.randn(content_image:size()):float():mul(0.001)
	elseif params.init == 'image' then
	img = content_image_caffe:float():clone()
	else
	error('Invalid init type')
	end
	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	img = img:cuda()
	else
	img = img:cl()
	end
	end

	--try the network with content image to detect features
	cimg = content_image_caffe:clone():float()
	if params.gpu >= 0 then
	if params.backend ~= 'clnn' then
	cimg = cimg:cuda()
	else
	cimg = cimg:cl()
	end
	end

	if f then
	local p = net:forward(cimg)
	print("----------- seeing the features --------------")
	for i=1, #labels do
	if p[i] > 0.03 then
	print(string.format("%.4f %s", p[i], labels[i]))
	end
	end
	end

	-- Run it through the network once to get the proper size for the gradient
	-- All the gradients will come from the extra loss modules, so we just pass
	-- zeros into the top of the net on the backward pass.
	local y = net:forward(img)
	local dy = img.new(#y):zero()


	-- Declaring this here lets us access it in maybe_print
	local optim_state = nil
	if params.optimizer == 'lbfgs' then
	optim_state = {
	maxIter = params.num_iterations,
	verbose=true,
	}
	elseif params.optimizer == 'adam' then
	optim_state = {
	learningRate = params.learning_rate,
	}
	else
	error(string.format('Unrecognized optimizer "%s"', params.optimizer))
	end

	local function maybe_print(t, loss, p)
	local verbose = (params.print_iter > 0 and t % params.print_iter == 0)
	if verbose then
	print(string.format('Iteration %d / %d', t, params.num_iterations))
	for i, loss_module in ipairs(content_losses) do
	print(string.format(' Content %d loss: %f', i, loss_module.loss))
	end
	for i, loss_module in ipairs(style_losses) do
	print(string.format(' Style %d loss: %f', i, loss_module.loss))
	end
	print(string.format(' Total loss: %f', loss))

	if f then
	for i=1, #labels do
	if p[i] > 0.03 then
	print(string.format("%.4f %s", p[i], labels[i]))
	end
	end
	end
	end
	end

	local function maybe_save(t)
	local should_save = params.save_iter > 0 and t % params.save_iter == 0
	should_save = should_save or t == params.num_iterations
	if should_save then
	local disp = deprocess(img:double())
	disp = image.minmax{tensor=disp, min=0, max=1}
	local filename = build_filename(params.output_image, t)
	if t == params.num_iterations then
	filename = params.output_image
	end
	image.save(filename, disp)
	end
	end

	-- Function to evaluate loss and gradient. We run the net forward and
	-- backward to get the gradient, and sum up losses from the loss modules.
	-- optim.lbfgs internally handles iteration and calls this fucntion many
	-- times, so we manually count the number of iterations to handle printing
	-- and saving intermediate results.
	local num_calls = 0
	local function feval(x)
	num_calls = num_calls + 1
	local p = net:forward(x)

	local grad = net:updateGradInput(x, dy)
	local loss = 0
	for _, mod in ipairs(content_losses) do
	loss = loss + mod.loss
	end
	for _, mod in ipairs(style_losses) do
	loss = loss + mod.loss
	end
	maybe_print(num_calls, loss, p)
	maybe_save(num_calls)

	collectgarbage()
	-- optim.lbfgs expects a vector for gradients
	return loss, grad:view(grad:nElement())
	end

	-- Run optimization.
	if params.optimizer == 'lbfgs' then
	print('Running optimization with L-BFGS')
	local x, losses = optim.lbfgs(feval, img, optim_state)
	elseif params.optimizer == 'adam' then
	print('Running optimization with ADAM')
	for t = 1, params.num_iterations do
	local x, losses = optim.adam(feval, img, optim_state)
	end
	end
	end


	function build_filename(output_image, iteration)
	local ext = paths.extname(output_image)
	local basename = paths.basename(output_image, ext)
	local directory = paths.dirname(output_image)
	return string.format('%s/%s_%d.%s',directory, basename, iteration, ext)
	end


	-- Preprocess an image before passing it to a Caffe model.
	-- We need to rescale from [0, 1] to [0, 255], convert from RGB to BGR,
	-- and subtract the mean pixel.
	function preprocess(img)
	local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68})
	local perm = torch.LongTensor{3, 2, 1}
	img = img:index(1, perm):mul(256.0)
	mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img)
	img:add(-1, mean_pixel)
	return img
	end


	-- Undo the above preprocessing.
	function deprocess(img)
	local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68})
	mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img)
	img = img + mean_pixel
	local perm = torch.LongTensor{3, 2, 1}
	img = img:index(1, perm):div(256.0)
	return img
	end


	-- Define an nn Module to compute content loss in-place
	local ContentLoss, parent = torch.class('nn.ContentLoss', 'nn.Module')

	function ContentLoss:__init(strength, target, normalize)
	parent.__init(self)
	self.strength = strength
	self.target = target
	self.normalize = normalize or false
	self.loss = 0
	self.crit = nn.MSECriterion()
	end

	function ContentLoss:updateOutput(input)
	if input:nElement() == self.target:nElement() then
	self.loss = self.crit:forward(input, self.target) * self.strength
	else
	print('WARNING: Skipping content loss')
	end
	self.output = input
	return self.output
	end

	function ContentLoss:updateGradInput(input, gradOutput)
	if input:nElement() == self.target:nElement() then
	self.gradInput = self.crit:backward(input, self.target)
	end
	if self.normalize then
	self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
	end
	self.gradInput:mul(self.strength)
	self.gradInput:add(gradOutput)
	return self.gradInput
	end

	-- Returns a network that computes the CxC Gram matrix from inputs
	-- of size C x H x W
	function GramMatrix()
	local net = nn.Sequential()
	net:add(nn.View(-1):setNumInputDims(2))
	local concat = nn.ConcatTable()
	concat:add(nn.Identity())
	concat:add(nn.Identity())
	net:add(concat)
	net:add(nn.MM(false, true))
	return net
	end


	-- Define an nn Module to compute style loss in-place
	local StyleLoss, parent = torch.class('nn.StyleLoss', 'nn.Module')

	function StyleLoss:__init(strength, target, normalize)
	parent.__init(self)
	self.normalize = normalize or false
	self.strength = strength
	self.target = target
	self.loss = 0

	self.gram = GramMatrix()
	self.G = nil
	self.crit = nn.MSECriterion()
	end

	function StyleLoss:updateOutput(input)
	self.G = self.gram:forward(input)
	self.G:div(input:nElement())
	self.loss = self.crit:forward(self.G, self.target)
	self.loss = self.loss * self.strength
	self.output = input
	return self.output
	end

	function StyleLoss:updateGradInput(input, gradOutput)
	local dG = self.crit:backward(self.G, self.target)
	dG:div(input:nElement())
	self.gradInput = self.gram:backward(input, dG)
	if self.normalize then
	self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
	end
	self.gradInput:mul(self.strength)
	self.gradInput:add(gradOutput)
	return self.gradInput
	end


	local TVLoss, parent = torch.class('nn.TVLoss', 'nn.Module')

	function TVLoss:__init(strength)
	parent.__init(self)
	self.strength = strength
	self.x_diff = torch.Tensor()
	self.y_diff = torch.Tensor()
	end

	function TVLoss:updateOutput(input)
	self.output = input
	return self.output
	end

	-- TV loss backward pass inspired by kaishengtai/neuralart
	function TVLoss:updateGradInput(input, gradOutput)
	self.gradInput:resizeAs(input):zero()
	local C, H, W = input:size(1), input:size(2), input:size(3)
	self.x_diff:resize(3, H - 1, W - 1)
	self.y_diff:resize(3, H - 1, W - 1)
	self.x_diff:copy(input[{{}, {1, -2}, {1, -2}}])
	self.x_diff:add(-1, input[{{}, {1, -2}, {2, -1}}])
	self.y_diff:copy(input[{{}, {1, -2}, {1, -2}}])
	self.y_diff:add(-1, input[{{}, {2, -1}, {1, -2}}])
	self.gradInput[{{}, {1, -2}, {1, -2}}]:add(self.x_diff):add(self.y_diff)
	self.gradInput[{{}, {1, -2}, {2, -1}}]:add(-1, self.x_diff)
	self.gradInput[{{}, {2, -1}, {1, -2}}]:add(-1, self.y_diff)
	self.gradInput:mul(self.strength)
	self.gradInput:add(gradOutput)
	return self.gradInput
	end


	local params = cmd:parse(arg)
	main(params)