ProGamerGov/LossModules.py Secret

## LossModules.py
import torch
import torch.nn as nn
import torch.legacy.nn as lnn
import torchvision
from torch.autograd import Variable

# Define an nn Module to compute content loss in-place
class ContentLoss(nn.Module):
    def __init__(self, strength, normalize):
      super(ContentLoss, self).__init__()
      self.strength = strength
      self.target = torch.Tensor()
      #self.target =  torch.Tensor().detach() * strength
      self.gradInput = torch.Tensor()
      self.normalize = 'false'
      self.loss = 0
      self.crit = nn.MSELoss()
      #self.crit = lnn.MSECriterion()
      self.mode = None

    def forward(self, input):
      if self.mode == 'loss':
        self.targetP = nn.Parameter(self.target,requires_grad=False)
        self.loss = self.crit(input.cuda(), self.targetP.cuda()) * self.strength #Forward
      elif self.mode == 'capture':
        self.target.resize_as_(input.cpu().data).copy_(input.cpu().data)
      self.output = input
      return self.output

    def backward(self, input, gradOutput):
      if self.mode == 'loss':
        if input.nelement() == self.target.nelement():
          self.gradInput = self.crit.backward(input, self.target) #Backward
        if self.normalize == 'True':
          self.gradInput.div(torch.norm(self.gradInput, 1) + 1e-8) # Normalize Gradients
        self.gradInput.mul(self.strength)
        self.gradInput.cpu()
        self.gradInput = self.gradInput.resize_as_(gradOutput.cpu().data)
        self.gradInput.add(gradOutput.cpu().data)
        #self.gradInput.backward(retain_graph=True)
      else:
        self.target.resize_as_(gradOutput).copy_(gradOutput)
      return self.gradInput


class GramMatrix(nn.Module):
   def __init__(self):
     super(GramMatrix, self).__init__()
     self.output = torch.Tensor()


   def forward(self, input):
     #assert input.dim() == 3
     B, C, H, W = input.size(0), input.size(1), input.size(2), input.size(3)
     x_flat = input.view(C, H * W)
     self.output.resize_(C, C)
     self.output = torch.mm(x_flat, x_flat.t())
     return self.output

   def backward(self, input, gradOutput):
     assert input.dim() == 3 and input.size(0)
     C, H, W = input.size(0), input.size(1), input.size(2)
     x_flat = input.view(C, H * W)
     #self.gradInput.resize(C, H * W).mm(gradOutput, x_flat)
     self.gradInput.resize_(C, H * W)#.mm(gradOutput, x_flat)
     self.gradInput = torch.mm(gradOutput, x_flat) #, out=self.gradInput
     self.gradInput.addmm(gradOutput.t(), x_flat)
     self.gradInput = self.gradInput.view(C, H, W)
     return self.gradInput


# Define an nn Module to compute style loss in-place
class StyleLoss(nn.Module):
    def __init__(self, strength, normalize):
      super(StyleLoss, self).__init__()
      self.normalize = 'false'
      self.strength = strength
      self.target = torch.Tensor()
      self.mode = None
      self.loss = 0
      self.gram = GramMatrix()
      self.blend_weight = None
      self.G = None
      self.crit = nn.MSELoss()

    def forward(self, input):
      self.G = self.gram.forward(input.data) # Forward Gram
      self.G.div(input.nelement()) #Lua (Fix): self.G:div(input:nElement())
      if self.mode == 'capture':
        if self.blend_weight == None:
          self.target.resize_as_(self.G.cpu()).copy_(self.G.cpu())
        elif self.target.nelement() == 0:
          self.target.resize_as_(self.G.cpu()).copy_(self.G.cpu()).mul_(self.blend_weight)
        else:
          self.target.add(self.blend_weight, self.G)
      elif self.mode == 'loss':
        self.GP = nn.Parameter(self.G,requires_grad=True)
        self.targetP = nn.Parameter(self.target,requires_grad=False)
        self.loss = self.strength * self.crit.forward(self.GP.cuda(), self.targetP.cuda()) #Forward
      self.output = input.clone()
      return self.output

    def backward(self, input, gradOutput):
      if self.mode == 'loss':
        self.targetP = nn.Parameter(self.target,requires_grad=False)
        self.GP = nn.Parameter(self.G.cpu(),requires_grad=True)
        dG_0 = self.crit(self.GP, self.targetP)
        #dG = self.crit.backward(self.G, self.target) # Backward
        dG = dG_0.backward()
        #dG.div(input.nelement())
        self.gradInput = self.gram.backward(input, dG) # Gram Backward
        if self.normalize == 'True':
          self.gradInput.div(torch.norm(self.gradInput, 1) + 1e-8) # Normalize Gradients
        self.gradInput.mul(self.strength)
        self.gradInput.add(gradOutput)
        #self.gradInput.backward(retain_graph=True)
      else:
        self.gradInput = gradOutput
      return self.gradInput

## LossModules2.py
import torch
import torch.nn as nn
#import torch.legacy.nn as lnn
import torchvision
from torch.autograd import Variable


class ContentLoss(nn.Module):

    def __init__(self, strength, normalize):
        super(ContentLoss, self).__init__()
        # we 'detach' the target content from the tree used
        #self.target = target.detach() * weight
        # to dynamically compute the gradient: this is a stated value,
        # not a variable. Otherwise the forward method of the criterion
        # will throw an error.
        self.target = torch.Tensor()
        self.strength = strength
        self.criterion = nn.MSELoss()
        self.mode = None
        self.normalize = 'false'

    def forward(self, input):
        if self.mode == 'loss':
            self.targetP = nn.Parameter(self.target,requires_grad=False)
            self.loss = self.criterion(input.cuda(), self.targetP.cuda())  * self.strength
        elif self.mode == 'capture':
            self.target.resize_as_(input.cpu().data).copy_(input.cpu().data)
        self.output = input
        return self.output

    def backward(self, input, gradOutput, retain_graph=True):
        if self.mode == 'loss':
          if input.nelement() == self.target.nelement():
             self.loss.backward(retain_graph=retain_graph)
          if self.normalize == 'True':
             self.gradInput.div(torch.norm(self.gradInput, 1) + 1e-8) # Normalize Gradients
          self.loss.mul(self.strength)
          self.loss.add(gradOutput)
          self.loss.backward(retain_graph=retain_graph)
        else:
          self.target.resize_as_(gradOutput).copy_(gradOutput)
        return self.loss

class GramMatrix(nn.Module):

    def forward(self, input):
        a, b, c, d = input.size()  # a=batch size(=1)
        # b=number of feature maps
        # (c,d)=dimensions of a f. map (N=c*d)

        features = input.view(a * b, c * d)  # resise F_XL into \hat F_XL

        G = torch.mm(features, features.t())  # compute the gram product

        # we 'normalize' the values of the gram matrix
        # by dividing by the number of element in each feature maps.
        return G.div(a * b * c * d)

    def backward(self, input, gradOutput):
        a, b, c, d = input.size()  # a=batch size(=1)
        features = input.view(a * b, c * d)  # resise F_XL into \hat F_XL
        G = torch.mm(gradOutput, features)  # compute the gram product
        G.addmm(gradOutput.t(), features)
        return G.div(a * b * c * d)


class StyleLoss(nn.Module):

    def __init__(self, strength, normalize):
        super(StyleLoss, self).__init__()
        #self.target = target.detach() * weight
        self.target = torch.Tensor()
        self.strength = strength
        self.gram = GramMatrix()
        self.criterion = nn.MSELoss()
        self.mode = None
        self.blend_weight = None
        self.G = None
        self.normalize = 'false'

    def forward(self, input):
        self.output = input.clone()
        self.G = self.gram(input)
        self.G.div(input.nelement())
        if self.mode == 'capture':
          if self.blend_weight == None:
            self.target.resize_as_(self.G.cpu().data).copy_(self.G.cpu().data)
          elif self.target.nelement() == 0:
            self.target.resize_as_(self.G.cpu().data).copy_(self.G.cpu().data).mul_(self.blend_weight)
          else:
            self.target.add(self.blend_weight, self.G.data)
        elif self.mode == 'loss':
            self.targetP = nn.Parameter(self.target,requires_grad=False)
            self.loss = self.strength * self.criterion(self.G.cuda(), self.targetP.cuda())
        return self.output

    def backward(self, input, gradOutput, retain_graph=True):
        if self.mode == 'loss':
          self.loss.backward(retain_graph=retain_graph)
          #dG = self.criterion.backward(self.G, self.target) # Backward
          self.loss.div(input.nelement())
          #self.loss = self.gram.backward(self.G, self.target) # Backward

          if self.normalize == 'True':
             self.gradInput.div(torch.norm(self.gradInput, 1) + 1e-8) # Normalize Gradients
          self.loss.mul(self.strength)
          self.loss.add(gradOutput)
          self.loss.backward(retain_graph=retain_graph)
        else:
          self.loss = gradOutput
        return self.loss

## st.py
import torch
import torch.nn as nn
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
from torch.autograd import Variable, Function
import torch.optim as optim

from PIL import Image
import os
import sys
from LossModules import ContentLoss
from LossModules import StyleLoss
from LossModules import GramMatrix

import argparse
parser = argparse.ArgumentParser()
# Basic options
parser.add_argument("-style_image", help="Style target image", default='examples/inputs/seated-nude.jpg')
parser.add_argument("-content_image", help="Content target image", default='examples/inputs/tubingen.jpg')
parser.add_argument("-image_size", help="Maximum height / width of generated image", type=int, default=512)
# Optimization options
parser.add_argument("-num_iterations", help="iterations", type=int, default=1000)
parser.add_argument("-optimizer", help="optimiser", default="lbfgs", choices=["lbfgs", "adam"])
# Output options
parser.add_argument("-print_iter", type=int, default=50)
parser.add_argument("-save_iter", type=int, default=100)
parser.add_argument("-output_image", default='out.png')
params = parser.parse_args()

use_cuda = torch.cuda.is_available()
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor


def ImageSetup(image_name, image_size):
    image = Image.open(image_name)
    image = image.convert('RGB')
    loader = transforms.Compose([transforms.Resize((image_size)), transforms.ToTensor()])  # resize and convert to tensor
    image = Variable(loader(image))
    image = image.unsqueeze(0)
    print(image.size())
    return image

def SaveImage(output_img, output_name):
   torchvision.utils.save_image(output_img, output_name, nrow=8, padding=2, normalize=False, range=None, scale_each=False, pad_value=0)


content_image = ImageSetup(params.content_image, params.image_size).cuda()
style_image = ImageSetup(params.style_image, params.image_size).cuda()


# Separate names for layers
VGG19_Layer_List = ['conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5', 'torch_view', 'fc6', 'relu6', 'drop6', 'fc7', 'relu7', 'drop7', 'fc8', 'prob']
VGG16_layer_List = ['conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'pool5', 'torch_view', 'fc6', 'relu6', 'drop6', 'fc7', 'relu7', 'drop7', 'fc8', 'prob']
NIN_Layer_List = ['conv1', 'relu0', 'cccp1', 'relu1', 'cccp2', 'relu2', 'pool0', 'conv2', 'relu3', 'cccp3', 'relu5', 'cccp4', 'relu6', 'pool2', 'conv3', 'relu7', 'cccp5', 'relu8', 'cccp6', 'relu9', 'pool3', 'drop', 'conv4-1024', 'relu10', 'cccp7-1024', 'relu11', 'cccp8-1024', 'relu12', 'pool4', 'loss']


def ModelSetup(cnn, style_weight, content_weight, Layer_List, content_layers, style_layers, normalize_gradients):
    content_losses = []
    style_losses = []
    next_content_idx = 1
    next_style_idx = 1
    net = nn.Sequential()
    net = net.cuda()

    i = 0
    for layer in list(cnn):
        l = int(i)
        layer_name = Layer_List[l]

        if "conv" in layer_name:
            net.add_module(layer_name, layer)
            if layer_name in content_layers:
                print("Setting up content layer " + str(next_content_idx) + ": " + str(layer_name))
                norm = normalize_gradients
                loss_module = ContentLoss(content_weight, norm)
                net.add_module(layer_name, loss_module)
                content_losses.append(loss_module)
                next_content_idx = next_content_idx + 1
            if layer_name in style_layers:
                print("Setting up style layer " + str(next_style_idx) + ": " + str(layer_name))
                norm = normalize_gradients
                loss_module = StyleLoss(style_weight, norm)
                net.add_module(layer_name, loss_module)
                style_losses.append(loss_module)
                next_style_idx = next_style_idx + 1

        if "relu" in layer_name:
            net.add_module(layer_name, layer)
            if layer_name in content_layers:
                print("Setting up content layer " + str(next_content_idx) + ": " + str(layer_name))
                norm = normalize_gradients
                loss_module = ContentLoss(content_weight, norm)
                net.add_module(layer_name, loss_module)
                content_losses.append(loss_module)
                next_content_idx = next_content_idx + 1
            if layer_name in style_layers:
                print("Setting up style layer " + str(next_style_idx) + ": " + str(layer_name))
                norm = normalize_gradients
                loss_module = StyleLoss(style_weight, norm)
                net.add_module(layer_name, loss_module)
                style_losses.append(loss_module)
                next_style_idx = next_style_idx + 1

        if "pool" in layer_name:
            net.add_module(layer_name, layer)  # ***

        i = i + 1
    cnn = None
    return net, style_losses, content_losses


model_type ='vgg19' # Default value for testing
style_weight = 1000 # Default value for testing
content_weight = 100 # Default value for testing
normalize_gradients = 'False' # Default value for testing
content_layers = ['relu4_2'] # Default value for testing
style_layers = ['relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1'] # Default value for testing
max_iter = 1000 # Default value for testing


cnn = None
Layer_List = []
if model_type == 'vgg19':
   cnn = models.vgg19(pretrained=True).features
   Layer_List = VGG19_Layer_List
elif model_type == 'vgg16':
   cnn = models.vgg16(pretrained=True).features
   Layer_List = VGG16_Layer_List
# Figure out what layer setup to use:


# Build the style transfer model:
net, style_losses, content_losses = ModelSetup(cnn, style_weight, content_weight, Layer_List, content_layers, style_layers, normalize_gradients)
net = net.cuda()

img = content_image.clone()
img = nn.Parameter(img.data,requires_grad=True)
content_image = nn.Parameter(content_image.data,requires_grad=True)
style_image = nn.Parameter(style_image.data,requires_grad=True)


# Capture content targets
for i in content_losses:
  i.mode = 'capture'
net(content_image).cuda()

print("Capturing content targets")
# Capture style targets
for i in content_losses:
  i.mode = None
for j in style_losses:
  j.mode = 'capture'
  #j.blend_weight = style_blend_weights[i]
net(style_image)

# Set all loss modules to loss mode
for i in content_losses:
  i.mode = 'loss'
for i in style_losses:
  i.mode = 'loss'


def maybe_print(t, loss):
   if params.print_iter > 0 and t % params.print_iter == 0:
    print("Iteration: " + str(t) + " / "+ str(params.num_iterations))
    for i in content_losses:
      print("  Content: " + str(i) + " loss: "+ str(i.loss))
    for i in style_losses:
      print("  Style: " + str(i) + " loss: "+ str(i.loss))
    print("  Total loss " + str(loss))

def maybe_save(t):
  should_save = params.save_iter > 0 and t % params.save_iter == 0
  should_save = should_save or t == params.num_iterations
  if should_save:
      output_filename, file_extension = os.path.splitext(params.output_image)
      if t == params.num_iterations:
        filename = output_filename + str(file_extension)
      else:
        filename = str(output_filename) + "_" + str(t) + str(file_extension)
      SaveImage(img.data, filename)

optim_state = None
if params.optimizer == 'lbfgs':
  optim_state = {
    "max_iter": params.num_iterations,
    "tolerance_change": -1,
    "tolerance_grad": -1,
  }
elif params.optimizer == 'adam':
  optim_state = {
    "lr": 1,
  }

optimizer = None
# Run optimization.
if params.optimizer == 'lbfgs':
  print("Running optimization with L-BFGS")
  optimizer = optim.LBFGS([img])
elif params.optimizer == 'adam':
  print("Running optimization with ADAM")
  for t in xrange(params.num_iterations):
    optimizer = optim.Adam([img], optim_state)

y = net(img)
dy = Variable(y.data.resize_as_(content_image.data).zero_())

num_calls = [0]
while num_calls[0] <= params.num_iterations:
  def feval():
    num_calls[0] += 1
    img.data.clamp_(0, 1)
    optimizer.zero_grad()
    #net(img)
    print(torch.mean(img.data))
    net(img)
    loss = 0
    gradOutput = dy.clone()
    input = img.clone()
    for mod in content_losses:
      mod.backward(input, gradOutput)
      loss = loss + mod.loss
    for mod in style_losses:
      mod.backward(input, gradOutput)
      loss = loss + mod.loss
    maybe_print(num_calls[0], loss)
    maybe_save(num_calls[0])
    # optim.lbfgs expects a vector for gradients
    return loss
  optimizer.step(feval)
	import torch
	import torch.nn as nn
	import torch.legacy.nn as lnn
	import torchvision
	from torch.autograd import Variable

	# Define an nn Module to compute content loss in-place
	class ContentLoss(nn.Module):
	def __init__(self, strength, normalize):
	super(ContentLoss, self).__init__()
	self.strength = strength
	self.target = torch.Tensor()
	#self.target = torch.Tensor().detach() * strength
	self.gradInput = torch.Tensor()
	self.normalize = 'false'
	self.loss = 0
	self.crit = nn.MSELoss()
	#self.crit = lnn.MSECriterion()
	self.mode = None

	def forward(self, input):
	if self.mode == 'loss':
	self.targetP = nn.Parameter(self.target,requires_grad=False)
	self.loss = self.crit(input.cuda(), self.targetP.cuda()) * self.strength #Forward
	elif self.mode == 'capture':
	self.target.resize_as_(input.cpu().data).copy_(input.cpu().data)
	self.output = input
	return self.output

	def backward(self, input, gradOutput):
	if self.mode == 'loss':
	if input.nelement() == self.target.nelement():
	self.gradInput = self.crit.backward(input, self.target) #Backward
	if self.normalize == 'True':
	self.gradInput.div(torch.norm(self.gradInput, 1) + 1e-8) # Normalize Gradients
	self.gradInput.mul(self.strength)
	self.gradInput.cpu()
	self.gradInput = self.gradInput.resize_as_(gradOutput.cpu().data)
	self.gradInput.add(gradOutput.cpu().data)
	#self.gradInput.backward(retain_graph=True)
	else:
	self.target.resize_as_(gradOutput).copy_(gradOutput)
	return self.gradInput


	class GramMatrix(nn.Module):
	def __init__(self):
	super(GramMatrix, self).__init__()
	self.output = torch.Tensor()


	def forward(self, input):
	#assert input.dim() == 3
	B, C, H, W = input.size(0), input.size(1), input.size(2), input.size(3)
	x_flat = input.view(C, H * W)
	self.output.resize_(C, C)
	self.output = torch.mm(x_flat, x_flat.t())
	return self.output

	def backward(self, input, gradOutput):
	assert input.dim() == 3 and input.size(0)
	C, H, W = input.size(0), input.size(1), input.size(2)
	x_flat = input.view(C, H * W)
	#self.gradInput.resize(C, H * W).mm(gradOutput, x_flat)
	self.gradInput.resize_(C, H * W)#.mm(gradOutput, x_flat)
	self.gradInput = torch.mm(gradOutput, x_flat) #, out=self.gradInput
	self.gradInput.addmm(gradOutput.t(), x_flat)
	self.gradInput = self.gradInput.view(C, H, W)
	return self.gradInput


	# Define an nn Module to compute style loss in-place
	class StyleLoss(nn.Module):
	def __init__(self, strength, normalize):
	super(StyleLoss, self).__init__()
	self.normalize = 'false'
	self.strength = strength
	self.target = torch.Tensor()
	self.mode = None
	self.loss = 0
	self.gram = GramMatrix()
	self.blend_weight = None
	self.G = None
	self.crit = nn.MSELoss()

	def forward(self, input):
	self.G = self.gram.forward(input.data) # Forward Gram
	self.G.div(input.nelement()) #Lua (Fix): self.G:div(input:nElement())
	if self.mode == 'capture':
	if self.blend_weight == None:
	self.target.resize_as_(self.G.cpu()).copy_(self.G.cpu())
	elif self.target.nelement() == 0:
	self.target.resize_as_(self.G.cpu()).copy_(self.G.cpu()).mul_(self.blend_weight)
	else:
	self.target.add(self.blend_weight, self.G)
	elif self.mode == 'loss':
	self.GP = nn.Parameter(self.G,requires_grad=True)
	self.targetP = nn.Parameter(self.target,requires_grad=False)
	self.loss = self.strength * self.crit.forward(self.GP.cuda(), self.targetP.cuda()) #Forward
	self.output = input.clone()
	return self.output

	def backward(self, input, gradOutput):
	if self.mode == 'loss':
	self.targetP = nn.Parameter(self.target,requires_grad=False)
	self.GP = nn.Parameter(self.G.cpu(),requires_grad=True)
	dG_0 = self.crit(self.GP, self.targetP)
	#dG = self.crit.backward(self.G, self.target) # Backward
	dG = dG_0.backward()
	#dG.div(input.nelement())
	self.gradInput = self.gram.backward(input, dG) # Gram Backward
	if self.normalize == 'True':
	self.gradInput.div(torch.norm(self.gradInput, 1) + 1e-8) # Normalize Gradients
	self.gradInput.mul(self.strength)
	self.gradInput.add(gradOutput)
	#self.gradInput.backward(retain_graph=True)
	else:
	self.gradInput = gradOutput
	return self.gradInput
	import torch
	import torch.nn as nn
	#import torch.legacy.nn as lnn
	import torchvision
	from torch.autograd import Variable


	class ContentLoss(nn.Module):

	def __init__(self, strength, normalize):
	super(ContentLoss, self).__init__()
	# we 'detach' the target content from the tree used
	#self.target = target.detach() * weight
	# to dynamically compute the gradient: this is a stated value,
	# not a variable. Otherwise the forward method of the criterion
	# will throw an error.
	self.target = torch.Tensor()
	self.strength = strength
	self.criterion = nn.MSELoss()
	self.mode = None
	self.normalize = 'false'

	def forward(self, input):
	if self.mode == 'loss':
	self.targetP = nn.Parameter(self.target,requires_grad=False)
	self.loss = self.criterion(input.cuda(), self.targetP.cuda()) * self.strength
	elif self.mode == 'capture':
	self.target.resize_as_(input.cpu().data).copy_(input.cpu().data)
	self.output = input
	return self.output

	def backward(self, input, gradOutput, retain_graph=True):
	if self.mode == 'loss':
	if input.nelement() == self.target.nelement():
	self.loss.backward(retain_graph=retain_graph)
	if self.normalize == 'True':
	self.gradInput.div(torch.norm(self.gradInput, 1) + 1e-8) # Normalize Gradients
	self.loss.mul(self.strength)
	self.loss.add(gradOutput)
	self.loss.backward(retain_graph=retain_graph)
	else:
	self.target.resize_as_(gradOutput).copy_(gradOutput)
	return self.loss

	class GramMatrix(nn.Module):

	def forward(self, input):
	a, b, c, d = input.size() # a=batch size(=1)
	# b=number of feature maps
	# (c,d)=dimensions of a f. map (N=c*d)

	features = input.view(a * b, c * d) # resise F_XL into \hat F_XL

	G = torch.mm(features, features.t()) # compute the gram product

	# we 'normalize' the values of the gram matrix
	# by dividing by the number of element in each feature maps.
	return G.div(a * b * c * d)

	def backward(self, input, gradOutput):
	a, b, c, d = input.size() # a=batch size(=1)
	features = input.view(a * b, c * d) # resise F_XL into \hat F_XL
	G = torch.mm(gradOutput, features) # compute the gram product
	G.addmm(gradOutput.t(), features)
	return G.div(a * b * c * d)



	class StyleLoss(nn.Module):

	def __init__(self, strength, normalize):
	super(StyleLoss, self).__init__()
	#self.target = target.detach() * weight
	self.target = torch.Tensor()
	self.strength = strength
	self.gram = GramMatrix()
	self.criterion = nn.MSELoss()
	self.mode = None
	self.blend_weight = None
	self.G = None
	self.normalize = 'false'

	def forward(self, input):
	self.output = input.clone()
	self.G = self.gram(input)
	self.G.div(input.nelement())
	if self.mode == 'capture':
	if self.blend_weight == None:
	self.target.resize_as_(self.G.cpu().data).copy_(self.G.cpu().data)
	elif self.target.nelement() == 0:
	self.target.resize_as_(self.G.cpu().data).copy_(self.G.cpu().data).mul_(self.blend_weight)
	else:
	self.target.add(self.blend_weight, self.G.data)
	elif self.mode == 'loss':
	self.targetP = nn.Parameter(self.target,requires_grad=False)
	self.loss = self.strength * self.criterion(self.G.cuda(), self.targetP.cuda())
	return self.output

	def backward(self, input, gradOutput, retain_graph=True):
	if self.mode == 'loss':
	self.loss.backward(retain_graph=retain_graph)
	#dG = self.criterion.backward(self.G, self.target) # Backward
	self.loss.div(input.nelement())
	#self.loss = self.gram.backward(self.G, self.target) # Backward

	if self.normalize == 'True':
	self.gradInput.div(torch.norm(self.gradInput, 1) + 1e-8) # Normalize Gradients
	self.loss.mul(self.strength)
	self.loss.add(gradOutput)
	self.loss.backward(retain_graph=retain_graph)
	else:
	self.loss = gradOutput
	return self.loss