Skip to content

Instantly share code, notes, and snippets.

@andreaskoepf
Created September 16, 2015 00:57
Show Gist options
  • Save andreaskoepf/28e7ec7e45bdcecb8fe7 to your computer and use it in GitHub Desktop.
Save andreaskoepf/28e7ec7e45bdcecb8fe7 to your computer and use it in GitHub Desktop.
mini single layer denoising auto-encoder experiment
require 'image'
require 'nngraph'
require 'optim'
-- mini single layer denoising auto-encoder experiment
-- (v1: no weight tying yet)
-- Andreas Köpf 2015-09-16
input = image.lena() -- 3x512x512
input_size = input:size()
mse = nn.MSECriterion()
-- kernel size + channel count
kw, kh, filters = 7, 7, 8
encoder = nn.Sequential()
encoder:add(nn.Dropout(0.9)) -- zero out randomly 90% of input
encoder:add(nn.SpatialConvolution(3, filters, kw, kh))
encoder:add(nn.PReLU())
decoder = nn.Sequential()
decoder:add(nn.SpatialConvolution(filters, kw*kh*3, 1, 1))
decoder:add(nn.PReLU())
input_node = nn.Identity()()
encoder_out = encoder(input_node)
decoder_out = decoder(encoder_out)
net = nn.gModule({ input_node }, { decoder_out })
weights, gradient = net:getParameters()
rmsprop_state = { learningRate = 0.01, alpha = 0.5 }
-- loss function of weights to optimize
local optimization_target = function(w)
if w ~= weights then
weights:copy(w)
end
gradient:zero()
gradient:zero()
reconstruction = net:forward(input) --> generates 27x510x510
-- now we need calc error and backprop
targets = torch.Tensor():resizeAs(reconstruction)
-- very slow part follows: create target for each inverted conv operation
for y=1,input_size[2]-kh+1 do
for x=1,input_size[3]-kw+1 do
-- get input read by 3x3x3 conv kernel
targets[{{}, y, x}] = input[{{}, {y, y+kh-1}, {x, x+kw-1}}]
end
end
local loss = mse:forward(reconstruction, targets)
net:backward(reconstruction, mse:backward(reconstruction, targets))
return loss, gradient
end
-- another very slow function: calc average value of input pixels
function create_avg_reconstruction(net_output)
local counts = torch.zeros(input_size) -- correctly handle border values
local avg = torch.zeros(input_size)
for y=1,512-kh+1 do
for x=1,512-kw+1 do
local v = net_output[{{}, y, x}]:clone():view(3,kh,kw)
local idx = {{}, {y, y+kh-1}, {x, x+kw-1}}
counts[idx]:add(1)
avg[idx]:add(v)
end
end
return avg:cdiv(counts)
end
function create_filter_image()
local w = encoder:get(2).weight:view(filters, 3, kh, kw)
local img = torch.Tensor(kh*3, kw*filters)
for i=1,filters do
local f = w[i]:clone()
-- normalize for better inspection
local lb,ub = f:min(), f:max()
f:add(-lb):div(ub-lb+1e-10)
for j=1,3 do -- input channels
img[{{(j-1)*kh+1, j*kh}, {(i-1)*kw+1, i*kw}}] = f[j]
end
end
return img
end
-- TRAINING, run 200 steps to get some structure in filter-kernel output dump
net:training()
for i=1,100 do
local _, loss = optim.rmsprop(optimization_target, weights, rmsprop_state)
print(string.format('%d: %f', i, loss[1]))
end
-- EVALUATION
--net:evaluate() -- commented out to stay in training mode to corrupt the input
net:forward(input)
auto_encoder_out = create_avg_reconstruction(net.output)
image.saveJPG('output.jpg', auto_encoder_out)
-- DUMP FILTER KERNEL IMAGE
local weight_image = create_filter_image()
image.saveJPG('filters.jpg', image.scale(weight_image, weight_image:size()[2] * 5, weight_image:size()[1] * 5))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment