Skip to content

Instantly share code, notes, and snippets.

@ProGamerGov
Created March 15, 2018 04:24
Show Gist options
  • Save ProGamerGov/360195eaa5ed480b5755285667a59975 to your computer and use it in GitHub Desktop.
Save ProGamerGov/360195eaa5ed480b5755285667a59975 to your computer and use it in GitHub Desktop.
import torch
import torch.legacy.nn
from torch.legacy.nn import Index as index
from torch.autograd import Variable
from PIL import Image
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.utils import save_image
import numpy as np
from torchvision.transforms import ToPILImage
import scipy.ndimage as spi
from skimage import io,transform,img_as_float
from skimage.io import imread,imsave
requires_grad = False
volatile = False
use_cuda = torch.cuda.is_available()
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
# Preprocess an image before passing it to a Caffe model.
# We need to rescale from [0, 1] to [0, 255], convert from RGB to BGR,
# and subtract the mean pixel.
MEAN_VALUE = np.array([103.939, 116.779, 123.68]) # BGR
def preproces3s(img):
# img is (channels, height, width), values are 0-255
img = img[::-1] # switch to BGR
img = img - MEAN_VALUE
img = Image.fromarray(img.astype('uint8'))
loader = transforms.Compose([transforms.ToTensor()]) # resize and convert to tensor
img = loader(img)
img = img.mul(255)
return img
# Undo the above preprocessing.
def dep3rocess(img):
img = img + MEAN_VALUE
img = img[::-1] # switch to BGR
return img
def preprocess(image):
mean_pixel = torch.FloatTensor([103.939, 116.779, 123.68])
#image_tensor = Image.open(image)
#image_tensor = np.array(image_tensor)
image_tensor = image
image_tensor = torch.from_numpy(image_tensor.astype(np.float32))
print(image_tensor.size())
#image_tensor = image_tensor - mean_pixel.view(1, 1, -1)
image_tensor = image_tensor.permute(2, 0, 1)
print(image_tensor.size())
#print(image_tensor)
#quit()
return image_tensor
def deprocess(image_tensor):
mean_pixel = torch.FloatTensor([103.939, 116.779, 123.68])
print(image_tensor.type())
mean_pixel = mean_pixel.cuda()
print(image_tensor.size())
#image_tensor = image_tensor + mean_pixel.view(-3, 1, 1)
#image_tensor = image_tensor.permute(1, 0, 2)
return image_tensor
#function preprocess(img)
# local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68})
# local perm = torch.LongTensor{3, 2, 1}
# img = img:index(1, perm):mul(256.0)
# mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img.size())
# img:add(-1, mean_pixel)
# return img
#end
def SaveImage(output_img, output_name):
loader = transforms.Compose([transforms.ToPILImage()])
#image = np.array(output_img)
#image = img_as_float(image)
#image = ToPILImage.output_img
output_img = output_img.permute(2, 1, 0)
#image = loader(output_img.cpu())
#image = deprocess(image)
#print(image)
image = output_img
print(image.size())
image.clamp_(0, 255)
imsave(output_name, output_img)
#torchvision.utils.save_image(output_img, output_name, nrow=8, padding=2, normalize=False, range=None, scale_each=False, pad_value=0)
#return img
import torch
import torch.legacy.nn as nn
import torchvision
#from GramMatrix import GramMatrix as GramMatrix
from time import gmtime, strftime
# Define an nn Module to compute content loss in-place
class ContentLoss(nn.Module):
def __init__(self, strength, normalize):
super(ContentLoss, self).__init__()
self.strength = strength
self.target = torch.Tensor()
self.normalize = 'false'
self.loss = 0
self.crit = nn.MSECriterion()
self.mode = None
def updateOutput(self, input):
if self.mode == 'loss':
self.loss = self.crit.updateOutput(input, self.target) * self.strength #Forward
elif self.mode == 'capture':
self.target.resize_as_(input).copy_(input)
self.output = input
return self.output
def updateGradInput(self, input, gradOutput):
if self.mode == 'loss':
if input.nelement() == self.target.nelement():
self.gradInput = self.crit.updateGradInput(input, self.target) #Backward
if self.normalize:
self.gradInput.div(torch.norm(self.gradInput, 1) + 1e-8) # Normalize Gradients
self.gradInput.mul(self.strength)
self.gradInput.add(gradOutput)
else:
self.target.resize_as_(gradOutput).copy_(gradOutput)
return self.gradInput
class GramMatrix(nn.Module):
def __init__(self):
super(GramMatrix, self).__init__()
def updateOutput(self, input):
assert input.dim() == 3
C, H, W = input.size(0), input.size(1), input.size(2)
x_flat = input.view(C, H * W)
self.output.resize_(C, C)
self.output = torch.mm(x_flat, x_flat.t())
return self.output
def updateGradInput(self, input, gradOutput):
assert input.dim() == 3 and input.size(0)
C, H, W = input.size(0), input.size(1), input.size(2)
x_flat = input.view(C, H * W)
#self.gradInput.resize(C, H * W).mm(gradOutput, x_flat)
self.gradInput.resize_(C, H * W)#.mm(gradOutput, x_flat)
self.gradInput = torch.mm(gradOutput, x_flat) #, out=self.gradInput
self.gradInput.addmm(gradOutput.t(), x_flat)
self.gradInput = self.gradInput.view(C, H, W)
return self.gradInput
# Define an nn Module to compute style loss in-place
class StyleLoss(nn.Module):
def __init__(self, strength, normalize):
super(StyleLoss, self).__init__()
self.normalize = 'false'
self.strength = strength
self.target = torch.Tensor()
self.mode = None
self.loss = 0
self.gram = GramMatrix()
self.blend_weight = None
self.G = None
self.crit = nn.MSECriterion()
def updateOutput(self, input):
self.G = self.gram.updateOutput(input) # Forward Gram
self.G.div(input.nelement()) #Lua (Fix): self.G:div(input:nElement())
if self.mode == 'capture':
if self.blend_weight == None:
self.target.resize_as_(self.G).copy_(self.G)
elif self.target.nelement() == 0:
self.target.resize_as_(self.G).copy_(self.G).mul_(self.blend_weight)
else:
self.target.add(self.blend_weight, self.G)
elif self.mode == 'loss':
self.loss = self.strength * self.crit.forward(self.G, self.target) #Forward
self.output = input
return self.output
def updateGradInput(self, input, gradOutput):
if self.mode == 'loss':
dG = self.crit.updateGradInput(self.G, self.target) # Backward
#dG.div(input.nelement())
self.gradInput = self.gram.updateGradInput(input, dG) # Gram Backward
if self.normalize:
self.gradInput.div(torch.norm(self.gradInput, 1) + 1e-8) # Normalize Gradients
self.gradInput.mul(self.strength)
self.gradInput.add(gradOutput)
else:
self.gradInput = gradOutput
return self.gradInput
import torch
import torch.nn as nn
import torch.legacy.nn as lnn
from torch.legacy.nn import SpatialConvolution
from torch.legacy.nn import SpatialMaxPooling
from torch.legacy.nn import ReLU
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
import copy
from time import gmtime, strftime
from LossModules import ContentLoss
from LossModules import StyleLoss
from LossModules import GramMatrix
requires_grad = False
volatile = False
use_cuda = torch.cuda.is_available()
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
VGG19_Layer_List = ['conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5', 'torch_view', 'fc6', 'relu6', 'drop6', 'fc7', 'relu7', 'drop7', 'fc8', 'prob']
VGG16_layer_List = ['conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'pool5', 'torch_view', 'fc6', 'relu6', 'drop6', 'fc7', 'relu7', 'drop7', 'fc8', 'prob']
NIN_Layer_List = ['conv1', 'relu0', 'cccp1', 'relu1', 'cccp2', 'relu2', 'pool0', 'conv2', 'relu3', 'cccp3', 'relu5', 'cccp4', 'relu6', 'pool2', 'conv3', 'relu7', 'cccp5', 'relu8', 'cccp6', 'relu9', 'pool3', 'drop', 'conv4-1024', 'relu10', 'cccp7-1024', 'relu11', 'cccp8-1024', 'relu12', 'pool4', 'loss']
def ModelSetup(cnn, style_weight, content_weight, content_layers, style_layers, normalize_gradients):
# Create the new network
content_losses = []
style_losses = []
next_content_idx = 1
next_style_idx = 1
net = lnn.Sequential()
net = net.cuda()
i = 0
for layer in cnn.modules:
#if next_content_idx <= len(content_layers) or next_style_idx <= len(style_layers):
l = int(i)
layer_name = VGG19_Layer_List[l]
#layer_name = NIN_Layer_List[l]
#print(layer_name)
#print(layer)
if "conv" in layer_name:
#if isinstance(layer, lnn.SpatialConvolution):
name = "conv_" + str(i)
net.add(layer)
if layer_name in content_layers:
print("Setting up content layer " + str(i) + ": " + str(layer_name))
norm = normalize_gradients
loss_module = ContentLoss(content_weight, norm)
print(loss_module)
net.add(loss_module)
content_losses.append(loss_module)
next_content_idx = next_content_idx + 1
if layer_name in style_layers:
print("Setting up style layer " + str(i) + ": " + str(layer_name))
norm = normalize_gradients
loss_module = StyleLoss(style_weight, norm)#.type(dtype)
net.add(loss_module)
style_losses.append(loss_module)
next_style_idx = next_style_idx + 1
if "relu" in layer_name:
#if isinstance(layer, lnn.ReLU):
name = "relu_" + str(i)
net.add(layer)
if layer_name in content_layers:
print("Setting up content layer " + str(i) + ": " + str(layer_name))
norm = normalize_gradients
loss_module = ContentLoss(content_weight, norm)
print(loss_module)
net.add(loss_module)
content_losses.append(loss_module)
next_content_idx = next_content_idx + 1
if layer_name in style_layers:
print("Setting up style layer " + str(i) + ": " + str(layer_name))
norm = normalize_gradients
loss_module = StyleLoss(style_weight, norm)#.type(dtype)
net.add(loss_module)
style_losses.append(loss_module)
next_style_idx = next_style_idx + 1
#if isinstance(layer, lnn.SpatialMaxPooling):
if "pool" in layer_name:
net.add(layer) # ***
i = i + 1
#print(net)
cnn = None
net = net.cuda()
return net, style_losses, content_losses
# Code - Trying to translate https://github.com/jcjohnson/neural-style/blob/master/neural_style.lua to PyTorch.
from __future__ import print_function
import os
import sys
from time import gmtime, strftime
import torch
import torch.legacy.nn as nn
import torch.nn as nn2
from torch.autograd import Variable
import torch.legacy.optim as optim
from PIL import Image
from torch.legacy.nn import SpatialConvolution
from torch.legacy.nn import SpatialMaxPooling
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.utils import save_image
from torch.utils.serialization import load_lua
import copy
requires_grad = False
volatile = False
import argparse
parser = argparse.ArgumentParser()
# Basic options
parser.add_argument("-style_image", help="Style target image", default='examples/inputs/seated-nude.jpg')
parser.add_argument("-content_image", help="Content target image", default='examples/inputs/tubingen.jpg')
parser.add_argument("-image_size", help="Maximum height / width of generated image", type=int, default=512)
# Optimization options
parser.add_argument("-content_weight", help="content weight", type=int, default=5)
parser.add_argument("-style_weight", help="style weight", type=int, default=10)
parser.add_argument("-num_iterations", help="iterations", type=int, default=1000)
parser.add_argument("-normalize_gradients", action='store_true')
parser.add_argument("-init", help="initialisation type", default="random", choices=["random", "image"])
parser.add_argument("-init_image", help="initial image", default="")
parser.add_argument("-optimizer", help="optimiser", default="lbfgs", choices=["lbfgs", "adam"])
parser.add_argument("-learning_rate", default=1)
parser.add_argument("-lbfgs_num_correction", help="lbfgs num correction", default=0)
# Output options
# Output options
parser.add_argument("-print_iter", type=int, default=50)
parser.add_argument("-save_iter", type=int, default=100)
parser.add_argument("-output_image", default='out.png')
# Other options
parser.add_argument("-style_scale", help="style scale", type=float, default=1.0)
#parser.add_argument("-proto_file", default='models/VGG_ILSVRC_19_layers_deploy.prototxt')
#parser.add_argument("-model_file", default='models/VGG_ILSVRC_19_layers.caffemodel')
parser.add_argument("-backend", choices=["nn", "cudnn", "clnn"], default='cudnn')
parser.add_argument("-seed", help="random number seed", default=-1)
params = parser.parse_args()
time = strftime("%H:%M:%S", gmtime())
print(time)
use_cuda = torch.cuda.is_available()
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
#cnn = models.vgg19(pretrained=True)#.features
cnn = load_lua('vgg19.t7')
#cnn = load_lua('nin_imagenet.t7')
from ModelSetup import ModelSetup
from ImageSetup import SaveImage
from ImageSetup import preprocess
from ImageSetup import deprocess
import scipy.ndimage as spi
from skimage import io,transform,img_as_float
from skimage.io import imread,imsave
from scipy.misc import imresize
#if use_cuda:
# cnn = cnn.cuda()
image_size = (512,512)
content_image = spi.imread(params.content_image, mode="RGB").astype(float)#/255
#content_image = Image.open(params.content_image)
#content_image = content_image.resize(params.image_size, Image.BILINEAR)
content_image = imresize(content_image, image_size, interp='bilinear', mode='RGB')
#content_image_caffe = preprocess(params.content_image).cuda()
content_image_caffe = preprocess(content_image).cuda()
style_image = spi.imread(params.style_image, mode="RGB").astype(float)
style_image = imresize(style_image, image_size, interp='bilinear')
style_image_caffe = preprocess(style_image).cuda()
#content_layers_default = ['relu4_2']
#style_layers_default = ['relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1']
content_layers_default = ['relu4_2']
style_layers_default = ['relu1_1', 'relu2_1', 'relu3_1']
#content_layers_default = ['relu1']
#style_layers_default = ['relu1']
def create_model(cnn, style_weight=params.style_weight, content_weight=params.style_weight, content_layers=content_layers_default, style_layers=style_layers_default, normalize_gradients=params.normalize_gradients):
net, style_losses, content_losses = ModelSetup(cnn, style_weight=params.style_weight, content_weight=params.style_weight, content_layers=content_layers_default, style_layers=style_layers_default, normalize_gradients=params.normalize_gradients)
return net, style_losses, content_losses
net, style_losses, content_losses = create_model(cnn, params.style_weight, params.content_weight, content_layers_default, style_layers_default)
# Capture content targets
for i in content_losses:
i.mode = 'capture'
print("Capturing content targets")
net.updateOutput(content_image_caffe)
print("Content Loss Captured")
# Capture style targets
for i in content_losses:
i.mode = None
print("Capturing style target")
for j in style_losses:
j.mode = 'capture'
#j.blend_weight = style_blend_weights[i]
net.updateOutput(style_image_caffe.cuda())
# Set all loss modules to loss mode
for i in content_losses:
i.mode = 'loss'
for i in style_losses:
i.mode = 'loss'
# Initialize the image
if params.seed >= 0:
torch.manualSeed(params.seed)
def maybe_print(t, loss):
if params.print_iter > 0 and t % params.print_iter == 0:
print("Iteration: " + str(t) + " / "+ str(params.num_iterations))
for i in content_losses:
print(" Content: " + str(i) + " loss: "+ str(i.loss))
for i in style_losses:
print(" Style: " + str(i) + " loss: "+ str(i.loss))
print(" Total loss " + str(loss))
def maybe_save(t):
should_save = params.save_iter > 0 and t % params.save_iter == 0
should_save = should_save or t == params.num_iterations
if should_save:
output_filename, file_extension = os.path.splitext(params.output_image)
if t == params.num_iterations:
filename = output_filename + str(file_extension)
else:
filename = str(output_filename) + "_" + str(t) + str(file_extension)
#dep_image = deprocess(img)
#SaveImage(dep_image, filename)
SaveImage(img, filename)
cnn = None
# Run it through the network once to get the proper size for the gradient
# All the gradients will come from the extra loss modules, so we just pass
# zeros into the top of the net on the backward pass.
img = content_image_caffe.clone()
print("Y Net Forward")
y = net.forward(img.cuda())
dy = y.zero_()
# Declaring this here lets us access it in maybe_print
optim_state = None
if params.optimizer == 'lbfgs':
optim_state = {
"maxIter": params.num_iterations,
"verbose": True,
"tolX": -1,
"tolFun": -1,
}
if params.lbfgs_num_correction > 0:
optim_state.nCorrection = params.lbfgs_num_correction
elif params.optimizer == 'adam':
optim_state = {
"learningRate": params.learning_rate,
}
# Function to evaluate loss and gradient. We run the net forward and
# backward to get the gradient, and sum up losses from the loss modules.
# optim.lbfgs internally handles iteration and calls this function many
# times, so we manually count the number of iterations to handle printing
# and saving intermediate results.
num_calls = [0]
def feval(x):
num_calls[0] += 1
net.updateOutput(x.cuda())
grad = net.updateGradInput(x.cuda(), dy.cuda())
loss = 0
for mod in content_losses:
loss = loss + mod.loss
for mod in style_losses:
loss = loss + mod.loss
maybe_print(num_calls[0], loss)
maybe_save(num_calls[0])
# optim.lbfgs expects a vector for gradients
return loss, grad.view(grad.nelement())
# Run optimization.
if params.optimizer == 'lbfgs':
print("Running optimization with L-BFGS")
x, losses = optim.lbfgs(feval, img, optim_state)
elif params.optimizer == 'adam':
print("Running optimization with ADAM")
for t in xrange(params.num_iterations):
x, losses = optim.adam(feval, img, optim_state)
#save_image(output_img, params.output_image)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment