-
-
Save ProGamerGov/f735c1360207b420c4f920d69853e157 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Code - Trying to translate https://github.com/jcjohnson/neural-style/blob/master/neural_style.lua to PyTorch. | |
from __future__ import print_function | |
import torch | |
import torch.legacy.nn as nn | |
from torch.autograd import Variable | |
import torch.legacy.optim as optim | |
from PIL import Image | |
#from skimage import io,transform,img_as_float | |
#from skimage.io import imread,imsave | |
import torchvision | |
import torchvision.transforms as transforms | |
import torchvision.models as models | |
from torchvision.utils import save_image | |
import copy | |
import argparse | |
parser = argparse.ArgumentParser() | |
# Basic options | |
parser.add_argument("-style_image", help="Style target image", default='examples/inputs/seated-nude.jpg') | |
parser.add_argument("-content_image", help="Content target image", default='examples/inputs/tubingen.jpg') | |
parser.add_argument("-image_size", help="Maximum height / width of generated image", type=int, default=512) | |
# Optimization options | |
parser.add_argument("-content_weight", help="content weight", type=int, default=5) | |
parser.add_argument("-style_weight", help="style weight", type=int, default=10) | |
parser.add_argument("-num_iterations", help="iterations", type=int, default=1000) | |
parser.add_argument("-normalize_gradients", action='store_true') | |
parser.add_argument("-init", help="initialisation type", default="random", choices=["random", "image"]) | |
parser.add_argument("-init_image", help="initial image", default="") | |
parser.add_argument("-optimizer", help="optimiser", default="lbfgs", choices=["lbfgs", "adam"]) | |
parser.add_argument("-learning_rate", default=1) | |
parser.add_argument("-lbfgs_num_correction", help="lbfgs num correction", default=0) | |
# Output options | |
parser.add_argument("-output_image", default='out.png') | |
# Other options | |
parser.add_argument("-style_scale", help="style scale", type=float, default=1.0) | |
#parser.add_argument("-proto_file", default='models/VGG_ILSVRC_19_layers_deploy.prototxt') | |
#parser.add_argument("-model_file", default='models/VGG_ILSVRC_19_layers.caffemodel') | |
parser.add_argument("-backend", choices=["nn", "cudnn", "clnn"], default='cudnn') | |
parser.add_argument("-seed", help="random number seed", default=-1) | |
params = parser.parse_args() | |
use_cuda = torch.cuda.is_available() | |
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor | |
#cnn = loadcaffe.load(params.proto_file, params.model_file, params.backend) #.type(dtype) | |
cnn = models.vgg19(pretrained=True).features | |
loader = transforms.Compose([ | |
transforms.Scale(params.image_size), # scale imported image | |
transforms.ToTensor()]) # transform it into a torch tensor | |
def image_loader(image_name): | |
image = Image.open(image_name) | |
image = Variable(loader(image)) | |
# fake batch dimension required to fit network's input dimensions | |
image = image.unsqueeze(0) | |
return image | |
content_image_caffe = image_loader(params.content_image).type(dtype) | |
style_image_caffe = image_loader(params.style_image).type(dtype) | |
# move it to the GPU if possible: | |
if use_cuda: | |
cnn = cnn.cuda() | |
#print(cnn) | |
content_layers_default = ['relu_4'] | |
style_layers_default = ['relu_1', 'relu_2', 'relu_3', 'relu_4', 'relu_5'] | |
def create_model(cnn, style_image_caffe, content_image_caffe, style_weight=params.style_weight, content_weight=params.style_weight, content_layers=content_layers_default, style_layers=style_layers_default): | |
cnn = copy.deepcopy(cnn) | |
content_losses = [] | |
style_losses = [] | |
model = nn.Sequential() # the new Sequential module network | |
#gram = GramMatrix() # we need a gram module in order to compute style targets | |
# move these modules to the GPU if possible: | |
if use_cuda: | |
model = model.cuda() | |
#gram = gram.cuda() | |
i = 1 | |
for layer in list(cnn): | |
if isinstance(layer, nn.ReLU): | |
name = "relu_" + str(i) | |
model.add_module(name, layer) | |
if name in content_layers: | |
# add content loss: | |
target = model(content_image_caffe).clone() | |
content_loss = ContentLoss(target, content_weight) | |
model.add_module("content_loss_" + str(i), content_loss) | |
content_losses.append(content_loss) | |
if name in style_layers: | |
# add style loss: | |
target_feature = model(style_image_caffe).clone() | |
target_feature_gram = gram(target_feature).cuda() | |
style_loss = StyleLoss(target_feature_gram, style_weight) | |
model.add_module("style_loss_" + str(i), style_loss) | |
style_losses.append(style_loss) | |
i += 1 | |
return model, style_losses, content_losses | |
# Define an nn Module to compute content loss in-place | |
class ContentLoss(nn.Module): | |
def __init__(self, target, strength, normalize): | |
super(ContentLoss, self).__init__() | |
self.strength = strength | |
self.target = target.detach() * strength | |
self.normalize = false | |
self.loss = 0 | |
self.crit = nn.MSECriterion() | |
self.mode = None | |
def updateOutput(self, input): | |
if self.mode == 'loss': | |
self.loss = self.crit.updateOutput(input, self.target) * self.strength #Forward | |
elif self.mode == 'capture': | |
self.target.resize_as_(input).copy_(input) | |
self.output = input | |
return self.output | |
def updateGradInput(self, input, gradOutput): | |
if self.mode == 'loss': | |
if input.nelement() == self.target.nelement(): | |
self.gradInput = self.crit.updateGradInput(input, self.target) #Backward | |
if self.normalize: | |
self.gradInput.div(torch.norm(self.gradInput, 1) + 1e-8) # Normalize Gradients | |
self.gradInput_mul(self.strength) | |
self.gradInput.add(gradOutput) | |
else: | |
self.target.resize_as_(gradOutput).copy_(gradOutput) | |
return self.gradInput | |
class GramMatrix(nn.Module): | |
def __init__(self, input): | |
super(GramMatrix, self).__init__() | |
def updateOutput(self, input): | |
assert input.dim() == 3 | |
C, H, W = input.size(1), input.size(2), input.size(3) | |
x_flat = input.view(C, H * W) | |
self.output.resize(C, C) | |
self.output.mm(x_flat, x_flat.t()) | |
return self.output | |
def updateGradInput(self, input, gradOutput): | |
assert input.dim() == 3 and input.size(1) | |
C, H, W = input.size(1), input.size(2), input.size(3) | |
x_flat = input.view(C, H * W) | |
self.gradInput.resize(C, H * W).mm(gradOutput, x_flat) | |
self.gradInput.addmm(gradOutput.t(), x_flat) | |
self.gradInput = self.gradInput.view(C, H, W) | |
return self.gradInput | |
# Define an nn Module to compute style loss in-place | |
class StyleLoss(nn.Module): | |
def __init__(self, target, strength, normalize): | |
super(StyleLoss, self).__init__() | |
self.normalize = false | |
self.strength = strength | |
self.target = target.detach() * strength | |
self.mode = None | |
self.loss = 0 | |
self.gram = GramMatrix() | |
self.blend_weight = nil | |
self.G = None | |
self.crit = nn.MSECriterion() | |
def updateOutput(self, input): | |
self.G = self.gram.updateOutput(input) # Forward Gram | |
self.G.div(input.nelement()) | |
if self.mode == 'capture': | |
if self.blend_weight == None: | |
self.target.resize_as_(self.G).copy_(self.G) | |
elif self.target.nelement() == 0: | |
self.target.resize_as_(self.G).copy_(self.G).mul_(self.blend_weight) | |
else: | |
self.target.add(self.blend_weight, self.G) | |
elif self.mode == 'loss': | |
self.loss = self.strength * self.crit.updateOutput(input, self.target) #Forward | |
self.output = input | |
return self.output | |
def updateGradInput(self, input, gradOutput): | |
if self.mode == 'loss': | |
dG = self.crit.updateGradInpu(self.G, self.target) # Backward | |
dG.div(input.nelement()) | |
self.gradInput = self.gram.updateGradInput(input) # Gram Backward | |
if self.normalize: | |
self.gradInput.div(torch.norm(self.gradInput, 1) + 1e-8) # Normalize Gradients | |
self.gradInput_mul(self.strength) | |
self.gradInput.add(gradOutput) | |
else: | |
self.gradInput = gradOutput | |
return self.gradInput | |
model, style_losses, content_losses = create_model(cnn, style_image_caffe, content_image_caffe, params.style_weight, params.content_weight, content_layers_default, style_layers_default) | |
img = content_image_caffe.clone() | |
# Run it through the network once to get the proper size for the gradient | |
# All the gradients will come from the extra loss modules, so we just pass | |
# zeros into the top of the net on the backward pass. | |
y = model.updateOutput(img) | |
dy = img.clone().zero_() | |
#dy = dy.zero_() | |
# Declaring this here lets us access it in maybe_print | |
optim_state = None | |
if params.optimizer == 'lbfgs': | |
optim_state = { | |
"maxIter": params.num_iterations, | |
"verbose": True, | |
"tolX":-1, | |
"tolFun":-1, | |
} | |
if params.lbfgs_num_correction > 0: | |
optim_state.nCorrection = params.lbfgs_num_correction | |
elif params.optimizer == 'adam': | |
optim_state = { | |
"learningRate": params.learning_rate, | |
} | |
# Function to evaluate loss and gradient. We run the net forward and | |
# backward to get the gradient, and sum up losses from the loss modules. | |
# optim.lbfgs internally handles iteration and calls this function many | |
# times, so we manually count the number of iterations to handle printing | |
# and saving intermediate results. | |
num_calls = [0] | |
def feval(x): | |
num_calls[0] += 1 | |
model.updateOutput(x) | |
grad = model.updateGradInput(x, dy) | |
loss = 0 | |
for n, mod in content_losses: | |
loss = loss + mod.loss | |
for n, mod in style_losses: | |
loss = loss + mod.loss | |
# optim.lbfgs expects a vector for gradients | |
return loss, grad.view(grad.nelement()) | |
print("Model Loaded") | |
# Capture content targets | |
for i in content_losses: | |
content_losses[i].mode = 'capture' | |
print("Capturing content targets") | |
content_image_caffe = content_image_caffe.type(dtype) | |
model.updateOutput(content_image_caffe.type(dtype)) | |
# Capture style targets | |
for i in content_losses: | |
content_losses[i].mode = None | |
print("Capturing style target") | |
for j in style_losses: | |
style_losses[j].mode = 'capture' | |
style_losses[j].blend_weight = style_blend_weights[i] | |
model.updateOutput(style_image_caffe) | |
# Set all loss modules to loss mode | |
for i in content_losses: | |
content_losses[i].mode = loss | |
for i in style_losses: | |
style_losses[i].mode = loss | |
# Initialize the image | |
if params.seed >= 0: | |
torch.manualSeed(params.seed) | |
# Run optimization. | |
if params.optimizer == 'lbfgs': | |
print("Running optimization with L-BFGS") | |
x, losses = optim.lbfgs(feval, img, optim_state) | |
elif params.optimizer == 'adam': | |
print("Running optimization with ADAM") | |
for t in params.num_iterations: | |
x, losses = optim.adam(feval, img, optim_state) | |
print("Test CNN") | |
#print(model) | |
torchvision.utils.save_image(output_img, params.output_image, nrow=8, padding=2, normalize=False, range=None, scale_each=False, pad_value=0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I made a script which can change the model layers: https://gist.github.com/ProGamerGov/318b5f53e5b9da1e6779c7c2baf60a29