andreaskoepf/conv_autoencoder.lua

## conv_autoencoder.lua
require 'image'
require 'nngraph'
require 'optim'

-- mini single layer denoising auto-encoder experiment
-- (v1: no weight tying yet)
-- Andreas Köpf 2015-09-16

input = image.lena() -- 3x512x512
input_size = input:size()
mse = nn.MSECriterion()

-- kernel size + channel count
kw, kh, filters = 7, 7, 8

encoder = nn.Sequential()
encoder:add(nn.Dropout(0.9)) -- zero out randomly 90% of input
encoder:add(nn.SpatialConvolution(3, filters, kw, kh))
encoder:add(nn.PReLU())

decoder = nn.Sequential()
decoder:add(nn.SpatialConvolution(filters, kw*kh*3, 1, 1))
decoder:add(nn.PReLU())

input_node = nn.Identity()()
encoder_out = encoder(input_node)
decoder_out = decoder(encoder_out)
net = nn.gModule({ input_node }, { decoder_out })

weights, gradient = net:getParameters()
rmsprop_state = { learningRate = 0.01, alpha = 0.5 }

-- loss function of weights to optimize
local optimization_target = function(w)
  if w ~= weights then
      weights:copy(w)
    end
    gradient:zero()
  gradient:zero()
  reconstruction = net:forward(input)   --> generates 27x510x510

  -- now we need calc error and backprop
  targets = torch.Tensor():resizeAs(reconstruction)

  -- very slow part follows: create target for each inverted conv operation
  for y=1,input_size[2]-kh+1 do
    for x=1,input_size[3]-kw+1 do
      -- get input read by 3x3x3 conv kernel
      targets[{{}, y, x}] = input[{{}, {y, y+kh-1}, {x, x+kw-1}}]
    end
  end

  local loss = mse:forward(reconstruction, targets)
  net:backward(reconstruction, mse:backward(reconstruction, targets))
  return loss, gradient
end

-- another very slow function: calc average value of input pixels
function create_avg_reconstruction(net_output)
  local counts = torch.zeros(input_size) -- correctly handle border values
  local avg = torch.zeros(input_size)

  for y=1,512-kh+1 do
    for x=1,512-kw+1 do
      local v = net_output[{{}, y, x}]:clone():view(3,kh,kw)
      local idx = {{}, {y, y+kh-1}, {x, x+kw-1}}
      counts[idx]:add(1)
      avg[idx]:add(v)
    end
  end

  return avg:cdiv(counts)
end

function create_filter_image()
  local w = encoder:get(2).weight:view(filters, 3, kh, kw)
  local img = torch.Tensor(kh*3, kw*filters)
  for i=1,filters do
    local f = w[i]:clone()
    -- normalize for better inspection
    local lb,ub = f:min(), f:max()
    f:add(-lb):div(ub-lb+1e-10)
    for j=1,3 do -- input channels
      img[{{(j-1)*kh+1, j*kh}, {(i-1)*kw+1, i*kw}}] = f[j]
    end
  end
  return img
end

-- TRAINING, run 200 steps to get some structure in filter-kernel output dump
net:training()
for i=1,100 do
  local _, loss = optim.rmsprop(optimization_target, weights, rmsprop_state)
  print(string.format('%d: %f', i, loss[1]))
end

-- EVALUATION
--net:evaluate() -- commented out to stay in training mode to corrupt the input
net:forward(input)
auto_encoder_out = create_avg_reconstruction(net.output)
image.saveJPG('output.jpg', auto_encoder_out)

-- DUMP FILTER KERNEL IMAGE
local weight_image = create_filter_image()
image.saveJPG('filters.jpg', image.scale(weight_image, weight_image:size()[2] * 5, weight_image:size()[1] * 5))
	require 'image'
	require 'nngraph'
	require 'optim'

	-- mini single layer denoising auto-encoder experiment
	-- (v1: no weight tying yet)
	-- Andreas Köpf 2015-09-16

	input = image.lena() -- 3x512x512
	input_size = input:size()
	mse = nn.MSECriterion()

	-- kernel size + channel count
	kw, kh, filters = 7, 7, 8

	encoder = nn.Sequential()
	encoder:add(nn.Dropout(0.9)) -- zero out randomly 90% of input
	encoder:add(nn.SpatialConvolution(3, filters, kw, kh))
	encoder:add(nn.PReLU())

	decoder = nn.Sequential()
	decoder:add(nn.SpatialConvolution(filters, kwkh3, 1, 1))
	decoder:add(nn.PReLU())

	input_node = nn.Identity()()
	encoder_out = encoder(input_node)
	decoder_out = decoder(encoder_out)
	net = nn.gModule({ input_node }, { decoder_out })

	weights, gradient = net:getParameters()
	rmsprop_state = { learningRate = 0.01, alpha = 0.5 }

	-- loss function of weights to optimize
	local optimization_target = function(w)
	if w ~= weights then
	weights:copy(w)
	end
	gradient:zero()
	gradient:zero()
	reconstruction = net:forward(input) --> generates 27x510x510

	-- now we need calc error and backprop
	targets = torch.Tensor():resizeAs(reconstruction)

	-- very slow part follows: create target for each inverted conv operation
	for y=1,input_size[2]-kh+1 do
	for x=1,input_size[3]-kw+1 do
	-- get input read by 3x3x3 conv kernel
	targets[{{}, y, x}] = input[{{}, {y, y+kh-1}, {x, x+kw-1}}]
	end
	end

	local loss = mse:forward(reconstruction, targets)
	net:backward(reconstruction, mse:backward(reconstruction, targets))
	return loss, gradient
	end

	-- another very slow function: calc average value of input pixels
	function create_avg_reconstruction(net_output)
	local counts = torch.zeros(input_size) -- correctly handle border values
	local avg = torch.zeros(input_size)

	for y=1,512-kh+1 do
	for x=1,512-kw+1 do
	local v = net_output[{{}, y, x}]:clone():view(3,kh,kw)
	local idx = {{}, {y, y+kh-1}, {x, x+kw-1}}
	counts[idx]:add(1)
	avg[idx]:add(v)
	end
	end

	return avg:cdiv(counts)
	end

	function create_filter_image()
	local w = encoder:get(2).weight:view(filters, 3, kh, kw)
	local img = torch.Tensor(kh3, kwfilters)
	for i=1,filters do
	local f = w[i]:clone()
	-- normalize for better inspection
	local lb,ub = f:min(), f:max()
	f:add(-lb):div(ub-lb+1e-10)
	for j=1,3 do -- input channels
	img[{{(j-1)kh+1, jkh}, {(i-1)kw+1, ikw}}] = f[j]
	end
	end
	return img
	end

	-- TRAINING, run 200 steps to get some structure in filter-kernel output dump
	net:training()
	for i=1,100 do
	local _, loss = optim.rmsprop(optimization_target, weights, rmsprop_state)
	print(string.format('%d: %f', i, loss[1]))
	end

	-- EVALUATION
	--net:evaluate() -- commented out to stay in training mode to corrupt the input
	net:forward(input)
	auto_encoder_out = create_avg_reconstruction(net.output)
	image.saveJPG('output.jpg', auto_encoder_out)

	-- DUMP FILTER KERNEL IMAGE
	local weight_image = create_filter_image()
	image.saveJPG('filters.jpg', image.scale(weight_image, weight_image:size()[2] * 5, weight_image:size()[1] * 5))