Skip to content

Instantly share code, notes, and snippets.

@JCBrouwer
Last active April 14, 2018 10:43
Show Gist options
  • Save JCBrouwer/727a273be3156f0fb927df0cd978e948 to your computer and use it in GitHub Desktop.
Save JCBrouwer/727a273be3156f0fb927df0cd978e948 to your computer and use it in GitHub Desktop.
Revised version of manuelrader's artistic-videos. This relies on https://github.com/ProGamerGov/Neural-Tools cloned next to the artistic-videos directory. Scales frames up as more passes are made as well as starting stylization on a different frame each pass to encourage looping videos.
require 'torch'
require 'nn'
require 'image'
require 'optim'
require 'loadcaffe'
require 'artistic_video_core'
local flowFile = require 'flowFileLoader'
--------------------------------------------------------------------------------
local cmd = torch.CmdLine()
-- Basic options
cmd:option('-style_image', 'example/seated-nude.jpg',
'Style target image')
cmd:option('-style_blend_weights', 'nil')
cmd:option('-content_pattern', 'example/marple8_%02d.ppm',
'Content target pattern')
cmd:option('-num_images', 0, 'Number of content images. Set 0 for autodetect.')
cmd:option('-start_number', 1, 'Frame index to start with')
cmd:option('-gpu', 0, 'Zero-indexed ID of the GPU to use; for CPU mode set -gpu = -1')
cmd:option('-number_format', '%d', 'Number format of the output images.')
-- Flow options
cmd:option('-forwardFlow_pattern', 'example/deepflow/forward_[%d]_{%d}.flo',
'Flow file pattern. [.] will be replaced with the "from"-index, {.} with the "to"-index.')
cmd:option('-backwardFlow_pattern', 'example/deepflow/backward_[%d]_{%d}.flo',
'Flow file pattern. [.] will be replaced with the "from"-index, {.} with the "to"-index.')
cmd:option('-forwardFlow_weight_pattern', 'example/deepflow/reliable_[%d]_{%d}.pgm',
'Flow file pattern. [.] will be replaced with the "from"-index, {.} with the "to"-index.')
cmd:option('-backwardFlow_weight_pattern', 'example/deepflow/reliable_[%d]_{%d}.pgm',
'Flow file pattern. [.] will be replaced with the "from"-index, {.} with the "to"-index.')
-- Multi-pass options
cmd:option('-blendWeight', 1.0, '')
cmd:option('-blendWeight_lastPass', 0.0, '')
cmd:option('-use_temporalLoss_after', 8, '')
cmd:option('-num_passes', 15, 'Number of passes')
cmd:option('-continue_with_pass', 1, '')
-- Optimization options
cmd:option('-content_weight', 5e0)
cmd:option('-style_weight', 5e3)
cmd:option('-temporal_weight', 5.5e3)
cmd:option('-tv_weight', 1e-3)
cmd:option('-temporal_loss_criterion', 'smoothl1', 'mse|smoothl1')
cmd:option('-num_iterations', 100, 'Number of iterations per pass')
cmd:option('-tol_loss_relative', 0, 'stop if relative change of the loss function is below this value')
cmd:option('-tol_loss_relative_interval', 100, 'interval between two function comparisons')
cmd:option('-normalize_gradients', true)
cmd:option('-init', 'image', 'random|image|prevWarped')
cmd:option('-optimizer', 'lbfgs', 'lbfgs|adam')
cmd:option('-learning_rate', 1e1)
-- Output options
cmd:option('-print_iter', 0)
cmd:option('-save_iter', 0)
cmd:option('-output_image', 'out.png')
cmd:option('-output_folder', '')
cmd:option('-save_init', false, 'Whether the initialization image should be saved (for debugging purposes).')
-- Other options
cmd:option('-style_scale', 1.0)
cmd:option('-pooling', 'max', 'max|avg')
cmd:option('-proto_file', 'models/VGG_ILSVRC_19_layers_deploy.prototxt')
cmd:option('-model_file', 'models/VGG_ILSVRC_19_layers.caffemodel')
cmd:option('-backend', 'cudnn', 'nn|cudnn|clnn')
cmd:option('-cudnn_autotune', true )
cmd:option('-seed', -1)
cmd:option('-content_layers', 'relu4_2', 'layers for content')
cmd:option('-style_layers', 'relu1_1,relu2_1,relu3_1,relu4_1,relu5_1', 'layers for style')
cmd:option('-args', '', 'Arguments in a file, one argument per line')
function nn.SpatialConvolutionMM:accGradParameters()
-- nop. not needed by our net
end
local function main(params)
if params.gpu >= 0 then
if params.backend ~= 'clnn' then
require 'cutorch'
require 'cunn'
cutorch.setDevice(params.gpu + 1)
else
require 'clnn'
require 'cltorch'
cltorch.setDevice(params.gpu + 1)
end
else
params.backend = 'nn'
end
if params.backend == 'cudnn' then
require 'cudnn'
if params.cudnn_autotune then
cudnn.benchmark = true
end
cudnn.SpatialConvolution.accGradParameters = nn.SpatialConvolutionMM.accGradParameters -- ie: nop
end
local loadcaffe_backend = params.backend
if params.backend == 'clnn' then loadcaffe_backend = 'nn' end
local cnn = loadcaffe.load(params.proto_file, params.model_file, loadcaffe_backend):float()
cnn = MaybePutOnGPU(cnn, params)
local num_images = params.num_images
if num_images == 0 then
num_images = calcNumberOfContentImages(params)
print("Detected " .. num_images .. " content images.")
end
local end_image_idx = num_images + params.start_number - 1
local style_images_caffe = getStyleImages(params)
-- Set up the network, inserting style and content loss modules
local net, style_losses, losses_indices, losses_type = buildNet(cnn, params, style_images_caffe)
-- We don't need the base CNN anymore, so clean it up to save memory.
cnn = nil
for i=1,#net.modules do
local module = net.modules[i]
if torch.type(module) == 'nn.SpatialConvolutionMM' then
-- remote these, not used, but uses gpu memory
module.gradWeight = nil
module.gradBias = nil
end
end
collectgarbage()
local img = nil
-- Initialize the image
if params.seed >= 0 then
torch.manualSeed(params.seed)
end
-- scaling parameters
scaledown = 0.125
scaleup = 1.0
--scalecurrent = (math.log(scaleup) - math.log(scaledown)) / (scalesteps - 1)
scalecurrent = scaledown
print( "Scale: " .. scaledown .. "-" .. scaleup)
local original_size = image.load(string.format(params.content_pattern, params.start_number), 3):size()
local randImg = torch.randn(original_size):mul(0.001)
local usePrev = params.init == 'prev' or params.init == 'prevWarped'
local needFlow = params.init == 'prevWarped' or params.prevPlusFlow_layers ~= ''
for run=params.continue_with_pass, params.num_passes do
-- determine scale for current pass
if run % 2 == 0 and scalecurrent < scaleup then scalecurrent = scalecurrent * 2 end
local H, W = math.max(math.ceil(original_size[2] * scalecurrent), 1), math.max(math.ceil(original_size[3] * scalecurrent), 1)
-- each run should start at a different offset to smooth over the loop points
local modStep = (run - 1) * math.ceil(end_image_idx / params.num_passes)
local flag = run % 2
local start = (flag == 0) and modStep + end_image_idx or modStep + params.start_number
local endp = (flag == 0) and modStep + params.start_number or modStep + end_image_idx
local incr = (flag == 0) and -1 or 1
for frameIdx = start, endp + (3 * incr), incr do
local modIdx = frameIdx % end_image_idx
if modIdx == 0 then modIdx = end_image_idx end
print("Styling frame: "..modIdx.." Scale: "..H.."x"..W)
local fileName = string.format(params.content_pattern, modIdx)
if not fileExists(fileName) then print("couldn't find content") end
if run == 1 then
os.execute(string.format(
'/usr/local/bin/python3 ../Neural-Tools/linear-color-transfer.py --target_image %s --source_image %s --output_image %s',
fileName, params.style_image, fileName.."-hist.png"
))
end
local content_image = image.load(string.format(params.content_pattern, modIdx).."-hist.png", 3)
local content_image_caffe_scaled = image.scale(content_image, W, H)
content_image_caffe_scaled = preprocess(content_image_caffe_scaled):float()
content_image_caffe_scaled = MaybePutOnGPU(content_image_caffe_scaled, params)
local content_losses, prevPlusFlow_losses = {}, {}
local additional_layers = 0
local num_iterations = params.num_iterations
-- Previous and following frame warped
local prevImageWarped, nextImageWarped = nil, nil
-- The warped frame which will be used for temporal consistency.
local imageWarped = nil
-- Find out if we are forward or backward pass, and set "imageWarped" accordingly.
if run ~= 1 then
if frameIdx == endp + incr then
prevImageWarped = readPrevImageWarped(modIdx, params, run, false)
nextImageWarped = readNextImageWarped(modIdx, params, run, false)
elseif frameIdx == start then
prevImageWarped = readPrevImageWarped(modIdx, params, run - 1, false)
nextImageWarped = readNextImageWarped(modIdx, params, run - 1, false)
else
prevImageWarped = readPrevImageWarped(modIdx, params, run - (1 - flag), false)
nextImageWarped = readNextImageWarped(modIdx, params, run - flag, false)
end
prevImageWarpedScaled = image.scale(prevImageWarped, W, H)
nextImageWarpedScaled = image.scale(nextImageWarped, W, H)
end
if flag == 1 then imageWarped = prevImageWarpedScaled end
if flag == 0 then imageWarped = nextImageWarpedScaled end
local temporalLossEnabled = run >= params.use_temporalLoss_after and imageWarped ~= nil
-- add layers for this iteration
for i=1, #losses_indices do
if losses_type[i] == 'content' then
local content_loss = getContentLossModuleForLayer(net,
losses_indices[i] + additional_layers,
content_image_caffe_scaled, params)
net:insert(content_loss, losses_indices[i] + additional_layers)
additional_layers = additional_layers + 1
table.insert(content_losses, content_loss)
elseif temporalLossEnabled then
imageWarped = preprocess(imageWarped):float()
imageWarped = MaybePutOnGPU(imageWarped, params)
local flowWeights = nil
if losses_type[i] == 'prevPlusFlowWeighted' then
local weightsFileName = nil
if flag == 1 then
weightsFileName = getFormatedFlowFileName(params.backwardFlow_weight_pattern, modIdx-1, modIdx)
else
if modIdx == end_image_idx then
weightsFileName = getFormatedFlowFileName(params.forwardFlow_weight_pattern, 1, 0)
else
weightsFileName = getFormatedFlowFileName(params.forwardFlow_weight_pattern, modIdx+1, modIdx)
end
end
print(string.format('Reading flowWeights file "%s".', weightsFileName))
flowWeights = image.load(weightsFileName):float()
flowWeights = flowWeights:expand(3, flowWeights:size(2), flowWeights:size(3))
flowWeightsScaled = image.scale(flowWeights, W, H)
flowWeightsScaled = MaybePutOnGPU(flowWeightsScaled, params)
end
local loss_module = getWeightedContentLossModuleForLayer(net,
losses_indices[i] + additional_layers, imageWarped,
params, flowWeightsScaled)
net:insert(loss_module, losses_indices[i] + additional_layers)
table.insert(prevPlusFlow_losses, loss_module)
additional_layers = additional_layers + 1
end
end
if run == 1 then
-- For the first run, process the frames independently
if params.init == 'random' then
img = image.scale(randImg:clone():float(), W, H)
elseif params.init == 'image' then
img = content_image_caffe_scaled:clone():float()
elseif params.init == 'prevWarped' then
local prevImageWarpedWithPad = readPrevImageWarped(modIdx, params, run - (1 - flag), true)
img = preprocess(prevImageWarpedWithPad):float()
img = image.scale(img, W, H)
else
print('Unknown initialization method.')
os.exit()
end
else
-- For subsequent runs, blend neighboring frames into the current frame
local ext = paths.extname(params.output_image)
local basename = paths.basename(params.output_image, ext)
local fileNameBase = '%s%s-' .. params.number_format
imgFile = string.format(fileNameBase .. '_%d.%s', params.output_folder, basename, modIdx, run - 1, ext)
histFile = string.format(fileNameBase .. '_%d.%s', params.output_folder.."/hist/", basename, modIdx, run - 1, ext)
os.execute(string.format(
'/usr/local/bin/python3 ../Neural-Tools/linear-color-transfer.py --target_image %s --source_image %s --output_image %s',
imgFile, params.style_image, histFile
))
img = image.load(histFile, 3)
img = image.scale(img, W, H)
-- Make sure to correctly normalize the result
local divisor = torch.zeros(3, W, H)
divisor:add(1)
if modIdx > params.start_number then
local weightsFileName = getFormatedFlowFileName(params.backwardFlow_weight_pattern, modIdx-1, modIdx)
print(string.format('Reading flowWeights file "%s".', weightsFileName))
local prevImageWeights = image.load(weightsFileName)
prevImageWeights = prevImageWeights:expand(3, prevImageWeights:size(2), prevImageWeights:size(3))
prevImageWeights:mul(params.blendWeight)
prevImageWeightsScaled = image.scale(prevImageWeights, W, H)
img:add(torch.cmul(prevImageWarpedScaled, prevImageWeightsScaled))
divisor:add(prevImageWeightsScaled)
else
-- if at endpoint use extra loop flow at 0 --> 1
local weightsFileName = getFormatedFlowFileName(params.backwardFlow_weight_pattern, 0, 1)
print(string.format('Reading flowWeights file "%s".', weightsFileName))
local prevImageWeights = image.load(weightsFileName)
prevImageWeights = prevImageWeights:expand(3, prevImageWeights:size(2), prevImageWeights:size(3))
prevImageWeights:mul(params.blendWeight)
prevImageWeightsScaled = image.scale(prevImageWeights, W, H)
print(prevImageWeightsScaled:size(2).." "..prevImageWarpedScaled:size(2).." "..torch.cmul(prevImageWarpedScaled, prevImageWeightsScaled):size(2))
img:add(torch.cmul(prevImageWarpedScaled, prevImageWeightsScaled))
divisor:add(prevImageWeightsScaled)
end
if modIdx < end_image_idx then
local weightsFileName = getFormatedFlowFileName(params.forwardFlow_weight_pattern, modIdx+1, modIdx)
print(string.format('Reading flowWeights file "%s".', weightsFileName))
local nextImageWeights = image.load(weightsFileName)
nextImageWeights = nextImageWeights:expand(3, nextImageWeights:size(2), nextImageWeights:size(3))
nextImageWeights:mul(params.blendWeight)
nextImageWeightsScaled = image.scale(nextImageWeights, W, H)
img:add(torch.cmul(nextImageWarpedScaled, nextImageWeightsScaled))
divisor:add(nextImageWeightsScaled)
else
-- if at endpoint use extra loop flow at 0 --> 1
local weightsFileName = getFormatedFlowFileName(params.forwardFlow_weight_pattern, 1, 0)
print(string.format('Reading flowWeights file "%s".', weightsFileName))
local nextImageWeights = image.load(weightsFileName)
nextImageWeights = nextImageWeights:expand(3, nextImageWeights:size(2), nextImageWeights:size(3))
nextImageWeights:mul(params.blendWeight)
nextImageWeightsScaled = image.scale(nextImageWeights, W, H)
print(prevImageWeightsScaled:size(2).." "..prevImageWarpedScaled:size(2).." "..torch.cmul(prevImageWarpedScaled, prevImageWeightsScaled):size(2))
img:add(torch.cmul(nextImageWarpedScaled, nextImageWeightsScaled))
divisor:add(nextImageWeightsScaled)
end
img:cdiv(divisor)
img = preprocess(img):float()
end
img = MaybePutOnGPU(img, params)
if params.save_init then
save_image(img, params.output_folder .. string.format(
'init-' .. params.number_format .. '_%d.png', modIdx, run))
end
-- Run the optimization for some iterations, save the result to disk
runOptimization(params, net, content_losses, style_losses, prevPlusFlow_losses,
img, modIdx, run, num_iterations / math.sqrt(math.sqrt(scalecurrent)) )
-- Remove this iteration's content and temporal layers
for i=#losses_indices, 1, -1 do
if temporalLossEnabled or losses_type[i] == 'content' then
additional_layers = additional_layers - 1
net:remove(losses_indices[i] + additional_layers)
end
end
assert(additional_layers == 0)
end
end
end
-- warp previous frame.
-- Disocclusions at the borders will be filled with the VGG mean pixel, if pad_mean_pixel is true.
function readPrevImageWarped(idx, params, run, pad_mean_pixel)
local flowFileName = nil
local flow = nil
local prevImg = nil
if idx == 1 then
flowFileName = getFormatedFlowFileName(params.backwardFlow_pattern, 0, 1)
print(string.format('Reading backward flow file "%s".', flowFileName))
flow = flowFile.load(flowFileName)
print(string.format('Previous image "%s".', build_OutFilename(params, calcNumberOfContentImages(params) + params.start_number - 1, run)))
prevImg = image.load(build_OutFilename(params, calcNumberOfContentImages(params) + params.start_number - 1, run), 3)
else
flowFileName = getFormatedFlowFileName(params.backwardFlow_pattern, idx-1, idx)
print(string.format('Reading backward flow file "%s".', flowFileName))
flow = flowFile.load(flowFileName)
print(string.format('Previous image "%s".', build_OutFilename(params, idx-1, run)))
prevImg = image.load(build_OutFilename(params, idx-1, run), 3)
end
local result = nil
if pad_mean_pixel then
local mean_pixel = torch.DoubleTensor({123.68/256.0, 116.779/256.0, 103.939/256.0})
result = image.warp(prevImg, flow, 'bilinear', true, 'pad', -1)
for x=1, result:size(2) do
for y=1, result:size(3) do
if result[1][x][y] == -1 and result[2][x][y] == -1 and result[3][x][y] == -1 then
result[1][x][y] = mean_pixel[1]
result[2][x][y] = mean_pixel[2]
result[3][x][y] = mean_pixel[3]
end
end
end
else
result = image.warp(image.scale(prevImg, flow:size(2), flow:size(3), 'bilinear'), flow)
end
return result
end
-- warp following frame.
-- Disocclusions at the borders will be filled with the VGG mean pixel, if pad_mean_pixel is true.
function readNextImageWarped(idx, params, run, pad_mean_pixel)
local flowFileName = nil
local flow = nil
local nextImg = nil
if idx == calcNumberOfContentImages(params) + params.start_number - 1 then
flowFileName = getFormatedFlowFileName(params.forwardFlow_pattern, 1, 0)
print(string.format('Reading forward flow file "%s".', flowFileName))
flow = flowFile.load(flowFileName)
print(string.format('Next image "%s".', build_OutFilename(params, 1, run)))
nextImg = image.load(build_OutFilename(params, 1, run), 3)
else
flowFileName = getFormatedFlowFileName(params.forwardFlow_pattern, idx+1, idx)
print(string.format('Reading forward flow file "%s".', flowFileName))
flow = flowFile.load(flowFileName)
print(string.format('Next image "%s".', build_OutFilename(params, idx+1, run)))
nextImg = image.load(build_OutFilename(params, idx+1, run), 3)
end
if pad_mean_pixel then
local mean_pixel = torch.DoubleTensor({123.68/256.0, 116.779/256.0, 103.939/256.0})
result = image.warp(nextImg, flow, 'bilinear', true, 'pad', -1)
for x=1, result:size(2) do
for y=1, result:size(3) do
if result[1][x][y] == -1 and result[2][x][y] == -1 and result[3][x][y] == -1 then
result[1][x][y] = mean_pixel[1]
result[2][x][y] = mean_pixel[2]
result[3][x][y] = mean_pixel[3]
end
end
end
else
result = image.warp(image.scale(nextImg, flow:size(2), flow:size(3), 'bilinear'), flow)
end
return result
end
local tmpParams = cmd:parse(arg)
local params = nil
local file = io.open(tmpParams.args, 'r')
if tmpParams.args == '' or file == nil then
params = cmd:parse(arg)
else
local args = {}
io.input(file)
local argPos = 1
while true do
local line = io.read()
if line == nil then break end
if line:sub(0, 1) == '-' then
local splits = str_split(line, " ", 2)
args[argPos] = splits[1]
args[argPos + 1] = splits[2]
argPos = argPos + 2
end
end
for i=1, #arg do
args[argPos] = arg[i]
argPos = argPos + 1
end
params = cmd:parse(args)
io.close(file)
end
main(params)
# Parse arguments
filename=$(basename "$1")
extension="${filename##*.}"
filename="${filename%.*}"
filename=${filename//[%]/x}
style_image=$2
content=$(basename ${1%.*})
style=$(basename ${2%.*})
out_dir=“./output“
# Create output folder
mkdir -p $out_dir/$content-$style/frames $out_dir/$content-$style/hist
resolution="1024:1024"
ffmpeg -i $1 -vf scale=$resolution $out_dir/$content-$style/frames/frame_%d.ppm
LAST_FRAME=$(( $(ls -1 $out_dir/$content-$style/frames | wc -l) - 1 ))
echo "clone first frame at end"
cp $out_dir/$content-$style/frames/frame_1.ppm \
$out_dir/$content-$style/frames/frame_$(( $LAST_FRAME + 1 )).ppm
echo "calculating optic flow"
./makeOptFlow.sh $out_dir/$content-$style/frames/frame_%d.ppm $out_dir/$content-$style/flow
echo "move flow to 0 position"
mv $out_dir/$content-$style/flow/forward_$LAST_FRAME\_$(( $LAST_FRAME + 1 )).flo \
$out_dir/$content-$style/flow/forward_0_1.flo || true
mv $out_dir/$content-$style/flow/backward_$(( $LAST_FRAME + 1 ))_$LAST_FRAME.flo \
$out_dir/$content-$style/flow/backward_1_0.flo || true
mv $out_dir/$content-$style/flow/reliable_$LAST_FRAME\_$(( $LAST_FRAME + 1 )).pgm \
$out_dir/$content-$style/flow/reliable_0_1.pgm || true
mv $out_dir/$content-$style/flow/reliable_$(( $LAST_FRAME + 1 ))_$LAST_FRAME.pgm \
$out_dir/$content-$style/flow/reliable_1_0.pgm || true
rm $out_dir/$content-$style/frames/frame_$(( $LAST_FRAME + 1 )).ppm
# Perform style transfer
th artistic_video_scaling_loop.lua \
-content_pattern $out_dir/$content-$style/frames/frame_%d.ppm \
-forwardFlow_pattern $out_dir/$content-$style/flow/forward_[%d]_{%d}.flo \
-backwardFlow_pattern $out_dir/$content-$style/flow/backward_[%d]_{%d}.flo \
-forwardFlow_weight_pattern $out_dir/$content-$style/flow/reliable_[%d]_{%d}.pgm \
-backwardFlow_weight_pattern $out_dir/$content-$style/flow/reliable_[%d]_{%d}.pgm \
-init image \
-content_weight 5 \
-style_weight 5000 \
-temporal_weight 5000 \
-blendWeight 1 \
-num_passes 9 \
-continue_with_pass 1 \
-use_temporalLoss_after 1 \
-num_iterations 300 \
-output_folder $out_dir/$content-$style/ \
-style_image $style_image
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment