Skip to content

Instantly share code, notes, and snippets.

@ProGamerGov
Last active July 5, 2018 01:27
Show Gist options
  • Save ProGamerGov/89973941721107f0bf713edfcfb467cf to your computer and use it in GitHub Desktop.
Save ProGamerGov/89973941721107f0bf713edfcfb467cf to your computer and use it in GitHub Desktop.
import torch
import torch.nn as nn
class VGG(nn.Module):
def __init__(self, features, num_classes=1000):
super(VGG, self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, num_classes),
)
class NIN(nn.Module):
def __init__(self, pooling):
super(NIN, self).__init__()
pool2d = None
if pooling == 'max':
pool2d = nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True)
elif pooling == 'avg':
pool2d = nn.AvgPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True)
self.features = nn.Sequential(
nn.Conv2d(3,96,(11, 11),(4, 4)),
nn.ReLU(inplace=True),
nn.Conv2d(96,96,(1, 1)),
nn.ReLU(inplace=True),
nn.Conv2d(96,96,(1, 1)),
nn.ReLU(inplace=True),
pool2d,
nn.Conv2d(96,256,(5, 5),(1, 1),(2, 2)),
nn.ReLU(inplace=True),
nn.Conv2d(256,256,(1, 1)),
nn.ReLU(inplace=True),
nn.Conv2d(256,256,(1, 1)),
nn.ReLU(inplace=True),
pool2d,
nn.Conv2d(256,384,(3, 3),(1, 1),(1, 1)),
nn.ReLU(inplace=True),
nn.Conv2d(384,384,(1, 1)),
nn.ReLU(inplace=True),
nn.Conv2d(384,384,(1, 1)),
nn.ReLU(inplace=True),
pool2d,
nn.Dropout(0.5),
nn.Conv2d(384,1024,(3, 3),(1, 1),(1, 1)),
nn.ReLU(inplace=True),
nn.Conv2d(1024,1024,(1, 1)),
nn.ReLU(inplace=True),
nn.Conv2d(1024,1000,(1, 1)),
nn.ReLU(inplace=True),
nn.AvgPool2d((6, 6),(1, 1),(0, 0),ceil_mode=True),
nn.Softmax(),
)
def BuildSequential(channel_list, pooling):
layers = []
in_channels = 3
if pooling == 'max':
pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
elif pooling == 'avg':
pool2d = nn.AvgPool2d(kernel_size=2, stride=2)
else:
print("Unrecognized pooling parameter")
quit()
for c in channel_list:
if c == 'P':
layers += [pool2d]
else:
conv2d = nn.Conv2d(in_channels, c, kernel_size=3, padding=1)
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = c
return nn.Sequential(*layers)
channel_list = {
'D': [64, 64, 'P', 128, 128, 'P', 256, 256, 256, 'P', 512, 512, 512, 'P', 512, 512, 512, 'P'],
'E': [64, 64, 'P', 128, 128, 'P', 256, 256, 256, 256, 'P', 512, 512, 512, 512, 'P', 512, 512, 512, 512, 'P'],
}
nin_dict = {
'C': ['conv1', 'cccp1', 'cccp2', 'conv2', 'cccp3', 'cccp4', 'conv3', 'cccp5', 'cccp6', 'conv4-1024', 'cccp7-1024', 'cccp8-1024'],
'R': ['relu0', 'relu1', 'relu2', 'relu3', 'relu5', 'relu6', 'relu7', 'relu8', 'relu9', 'relu10', 'relu11', 'relu12'],
'P': ['pool1', 'pool2', 'pool3', 'pool4'],
'D': ['drop'],
}
vgg16_dict = {
'C': ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1', 'conv5_2', 'conv5_3'],
'R': ['relu1_1', 'relu1_2', 'relu2_1', 'relu2_2', 'relu3_1', 'relu3_2', 'relu3_3', 'relu4_1', 'relu4_2', 'relu4_3', 'relu5_1', 'relu5_2', 'relu5_3'],
'P': ['pool1', 'pool2', 'pool3', 'pool4', 'pool5'],
}
vgg19_dict = {
'C': ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv3_4', 'conv4_1', 'conv4_2', 'conv4_3', 'conv4_4', 'conv5_1', 'conv5_2', 'conv5_3', 'conv5_4'],
'R': ['relu1_1', 'relu1_2', 'relu2_1', 'relu2_2', 'relu3_1', 'relu3_2', 'relu3_3', 'relu3_4', 'relu4_1', 'relu4_2', 'relu4_3', 'relu4_4', 'relu5_1', 'relu5_2', 'relu5_3', 'relu5_4'],
'P': ['pool1', 'pool2', 'pool3', 'pool4', 'pool5'],
}
def vgg19(pooling, **kwargs):
# VGG 19-layer model (configuration "E")
model = VGG(BuildSequential(channel_list['E'], pooling), **kwargs)
return model, vgg19_dict
def vgg16(pooling, **kwargs):
# VGG 16-layer model (configuration "D")
model = VGG(BuildSequential(channel_list['D'], pooling), **kwargs)
return model, vgg16_dict
def nin(pooling, **kwargs):
# Network In Network model
model = NIN(pooling)
return model, nin_dict
# Fixes from here: https://github.com/jcjohnson/pytorch-vgg/issues/3
# Usage: python download_models.py or python3 download_models.py
import torch
import urllib
from collections import OrderedDict
from torch.utils.model_zoo import load_url
# Download the VGG-19 model and fix the layer names:
sd = load_url("https://s3-us-west-2.amazonaws.com/jcjohns-models/vgg19-d01eb7cb.pth")
map = {'classifier.1.weight':u'classifier.0.weight', 'classifier.1.bias':u'classifier.0.bias', 'classifier.4.weight':u'classifier.3.weight', 'classifier.4.bias':u'classifier.3.bias'}
sd = OrderedDict([(map[k] if k in map else k,v) for k,v in sd.iteritems()])
torch.save(sd, "models/vgg19-d01eb7cb.pth")
# Download the VGG-16 model and fix the layer names:
sd = load_url("https://s3-us-west-2.amazonaws.com/jcjohns-models/vgg16-00b39a1b.pth")
map = {'classifier.1.weight':u'classifier.0.weight', 'classifier.1.bias':u'classifier.0.bias', 'classifier.4.weight':u'classifier.3.weight', 'classifier.4.bias':u'classifier.3.bias'}
sd = OrderedDict([(map[k] if k in map else k,v) for k,v in sd.iteritems()])
torch.save(sd, "models/vgg16-00b39a1b.pth")
# Download the NIN model:
modelfile = urllib.URLopener()
modelfile.retrieve("https://raw.githubusercontent.com/ProGamerGov/pytorch-nin/master/nin_imagenet.pth", "models/nin_imagenet.pth")
import torch
import torch.nn as nn
# Define an nn Module to compute content loss in-place
class ContentLoss(nn.Module):
def __init__(self, strength):
super(ContentLoss, self).__init__()
self.target = torch.Tensor().float()
self.strength = strength
self.crit = nn.MSELoss()
self.mode = 'None'
def forward(self, input):
if self.mode == 'capture':
self.target = input.detach()
elif self.mode == 'loss':
self.loss = self.crit(input, self.target) * self.strength
self.output = input
return self.output
def backward(self, retain_graph=True):
self.loss.backward(retain_graph=retain_graph)
return self.loss
class GramMatrix(nn.Module):
def forward(self, input):
B, C, H, W = input.size()
x_flat = input.view(C, H * W)
self.output = torch.mm(x_flat, x_flat.t())
return self.output
# Define an nn Module to compute style loss in-place
class StyleLoss(nn.Module):
def __init__(self, strength):
super(StyleLoss, self).__init__()
self.target = torch.Tensor()
self.strength = strength
self.gram = GramMatrix()
self.crit = nn.MSELoss()
self.mode = 'None'
self.blend_weight = None
def forward(self, input):
self.output = input.clone()
self.G = self.gram(input)
self.G.div_(input.nelement())
if self.mode == 'capture':
if self.blend_weight == None:
self.target = self.G.detach()
elif self.target.nelement() == 0:
self.target = self.G.detach().mul(self.blend_weight)
else:
self.target = self.target.add(self.blend_weight, self.G.detach())
elif self.mode == 'loss':
self.loss = self.strength * self.crit(self.G, self.target)
return self.output
def backward(self, retain_graph=True):
self.loss.backward(retain_graph=retain_graph)
return self.loss
class TVLoss(nn.Module):
def __init__(self, strength):
super(TVLoss, self).__init__()
self.strength = strength
self.x_diff = torch.Tensor()
self.y_diff = torch.Tensor()
def forward(self, input):
self.output = input.clone()
self.x_diff = input[:,:,1:,:] - input[:,:,:-1,:]
self.y_diff = input[:,:,:,1:] - input[:,:,:,:-1]
self.loss = self.strength * (torch.sum(torch.abs(self.x_diff)) + torch.sum(torch.abs(self.y_diff)))
return self.output
def backward(self, retain_graph=True):
self.loss.backward(retain_graph=retain_graph)
return self.loss
import os
import copy
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from PIL import Image
from torch.autograd import Variable
from CVGG import vgg19, vgg16, nin
from LossModules import ContentLoss, StyleLoss, TVLoss
import argparse
parser = argparse.ArgumentParser()
# Basic options
parser.add_argument("-style_image", help="Style target image", default='examples/inputs/seated-nude.jpg')
parser.add_argument("-style_blend_weights", default=None)
parser.add_argument("-content_image", help="Content target image", default='examples/inputs/tubingen.jpg')
parser.add_argument("-image_size", help="Maximum height / width of generated image", type=int, default=512)
parser.add_argument("-gpu", help="Zero-indexed ID of the GPU to use; for CPU mode set -gpu = -1", default='0')
# Optimization options
parser.add_argument("-content_weight", type=float, default=5e0)
parser.add_argument("-style_weight", type=float, default=1e2)
parser.add_argument("-tv_weight", type=float, default=0.001)
parser.add_argument("-num_iterations", type=int, default=1000)
parser.add_argument("-init", default="random", choices=["random", "image"])
parser.add_argument("-init_image", default=None)
parser.add_argument("-optimizer", default="lbfgs", choices=["lbfgs", "adam"])
parser.add_argument("-learning_rate", type=float, default=1e0)
parser.add_argument("-lbfgs_num_correction", type=int, default='0')
# Output options
parser.add_argument("-print_iter", type=int, default=50)
parser.add_argument("-save_iter", type=int, default=100)
parser.add_argument("-output_image", default='out.png')
# Other options
parser.add_argument("-style_scale", type=float, default=1.0)
parser.add_argument("-pooling", help="max or avg pooling", type=str, default='max')
parser.add_argument("-model_file", type=str, default='models/vgg19-d01eb7cb.pth')
parser.add_argument("-backend", type=str, default='cudnn')
parser.add_argument("-cudnn_autotune", action='store_true')
parser.add_argument("-seed", type=int, default=-1)
parser.add_argument("-content_layers", help="layers for content", default='relu4_2')
parser.add_argument("-style_layers", help="layers for style", default='relu1_1,relu2_1,relu3_1,relu4_1,relu5_1')
params = parser.parse_args()
Image.MAX_IMAGE_PIXELS = 1000000000 # Support gigapixel images
dtype = None
if params.gpu == '0':
if params.backend =='cudnn':
torch.backends.cudnn.enabled = True
if params.cudnn_autotune:
torch.backends.cudnn.benchmark = True
else:
torch.backends.cudnn.enabled = False
dtype = torch.cuda.FloatTensor
elif params.gpu == '-1':
if params.backend =='mkl':
torch.backends.mkl.enabled = True
dtype = torch.FloatTensor
# Optionally set the seed value
if params.seed >= 0:
torch.manual_seed(params.seed)
torch.cuda.manual_seed(params.seed)
torch.backends.cudnn.deterministic=True
# Preprocess an image before passing it to a model.
# We need to rescale from [0, 1] to [0, 255], convert from RGB to BGR,
# and subtract the mean pixel.
def ImageSetup(image_name, image_size):
image = Image.open(image_name).convert('RGB')
if type(image_size) is not tuple:
image_size = tuple([int((float(image_size) / max(image.size))*x) for x in (image.height, image.width)]) # Get resize dimensions
Loader = transforms.Compose([transforms.Resize(image_size), transforms.ToTensor()]) # resize and convert to tensor
rgb2bgr = transforms.Compose([transforms.Lambda(lambda x: x[torch.LongTensor([2,1,0])]) ])
Normalize = transforms.Compose([transforms.Normalize(mean=[103.939, 116.779, 123.68], std=[1,1,1]) ]) # Subtract BGR
tensor = Variable(Normalize(rgb2bgr(Loader(image) * 256))).unsqueeze(0)
return tensor
# Undo the above preprocessing and save the tensor as an image:
def SaveImage(output_tensor, output_name):
Normalize = transforms.Compose([transforms.Normalize(mean=[-103.939, -116.779, -123.68], std=[1,1,1]) ]) # Add BGR
bgr2rgb = transforms.Compose([transforms.Lambda(lambda x: x[torch.LongTensor([2,1,0])]) ])
output_tensor = bgr2rgb(Normalize(output_tensor.squeeze(0).cpu().data)) / 256
output_tensor.clamp_(0, 1)
Image2PIL = transforms.ToPILImage()
image = Image2PIL(output_tensor.cpu())
image.save(str(output_name))
def maybe_save(t):
should_save = params.save_iter > 0 and t % params.save_iter == 0
should_save = should_save or t == params.num_iterations
if should_save:
output_filename, file_extension = os.path.splitext(params.output_image)
if t == params.num_iterations:
filename = output_filename + str(file_extension)
else:
filename = str(output_filename) + "_" + str(t) + str(file_extension)
SaveImage(img.clone(), filename)
def maybe_print(t, contentLossList, styleLossList):
if params.print_iter > 0 and t % params.print_iter == 0:
print("Iteration: " + str(t) + " / "+ str(params.num_iterations))
totalLoss, c, s = 0, 0, 0
for i in content_losses:
print(" Content " + str(c+1) + " loss: " + str(contentLossList[c]))
totalLoss += contentLossList[c]
c = c+1
for i in style_losses:
print(" Style " + str(s+1) + " loss: " + str(styleLossList[s]))
totalLoss += styleLossList[s]
s = s+1
print(" Total loss: " + str(totalLoss))
content_image = ImageSetup(params.content_image, params.image_size).type(dtype)
style_image_list = params.style_image.split(',')
style_images_caffe = []
for image in style_image_list:
image_size = int(params.image_size * params.style_scale)
img_caffe = ImageSetup(image, image_size).type(dtype)
style_images_caffe.append(img_caffe)
init_image = None
if params.init_image != None:
image_size = (content_image.size(2), content_image.size(3))
init_image = ImageSetup(params.init_image, image_size).type(dtype)
style_blend_weights = []
if params.style_blend_weights == None:
# Style blending not specified, so use equal weighting
for i in style_image_list:
style_blend_weights.append(1.0)
i = 0
for blend_weights in style_blend_weights:
style_blend_weights[i] = int(style_blend_weights[i])
i+=1
else:
style_blend_weights = params.style_blend_weights.split(',')
# Normalize the style blending weights so they sum to 1
style_blend_sum = 0
i = 0
for blend_weights in style_blend_weights:
style_blend_weights[i] = float(style_blend_weights[i])
style_blend_sum = float(style_blend_sum) + style_blend_weights[i]
i+=1
i = 0
for blend_weights in style_blend_weights:
style_blend_weights[i] = float(style_blend_weights[i]) / float(style_blend_sum)
i+=1
# Initialize the image
if params.init == 'random':
B, C, H, W = content_image.data.size()
img = Variable(torch.randn(C, H, W)).unsqueeze(0).type(dtype)
elif params.init == 'image':
if params.init_image != None:
img = init_image.clone()
else:
img = content_image.clone()
img = img.type(dtype)
content_layers = params.content_layers.split(',')
style_layers = params.style_layers.split(',')
# Get the model class, and configure pooling layer type
def buildCNN(model_file, pooling):
cnn, layerList = None, None
if "vgg19" in str(model_file):
print("VGG-19 Architecture Detected")
cnn, layerList = vgg19(pooling)
elif "vgg16" in str(model_file):
print("VGG-16 Architecture Detected")
cnn, layerList = vgg16(pooling)
elif "nin" in str(model_file):
print("NIN Architecture Detected")
cnn, layerList = nin(pooling)
return cnn, layerList
# Set up the network, inserting style and content loss modules
def modelSetup(cnn, layerList):
cnn = copy.deepcopy(cnn)
content_losses, style_losses, tv_losses = [], [], []
net = nn.Sequential()
i, c, r = 1, 0, 0
if params.tv_weight > 0:
tv_mod = TVLoss(params.tv_weight).type(dtype)
net.add_module(str(len(net)), tv_mod)
tv_losses.append(tv_mod)
for layer in list(cnn):
if isinstance(layer, nn.Conv2d):
net.add_module(str(len(net)), layer)
layerType = layerList['C']
if layerType[c] in content_layers:
print("Setting up content layer " + str(i) + ": " + str(layerType[c]))
loss_module = ContentLoss(params.content_weight)
net.add_module(str(len(net)), loss_module)
content_losses.append(loss_module)
if layerType[c] in style_layers:
print("Setting up style layer " + str(i) + ": " + str(layerType[c]))
loss_module = StyleLoss(params.style_weight)
net.add_module(str(len(net)), loss_module)
style_losses.append(loss_module)
c+=1
if isinstance(layer, nn.ReLU):
net.add_module(str(len(net)), layer)
layerType = layerList['R']
if layerType[r] in content_layers:
print("Setting up content layer " + str(i) + ": " + str(layerType[r]))
loss_module = ContentLoss(params.content_weight)
net.add_module(str(len(net)), loss_module)
content_losses.append(loss_module)
if layerType[r] in style_layers:
print("Setting up style layer " + str(i) + ": " + str(layerType[r]))
loss_module = StyleLoss(params.style_weight)
net.add_module(str(len(net)), loss_module)
style_losses.append(loss_module)
r+=1
i += 1
if isinstance(layer, nn.MaxPool2d) or isinstance(layer, nn.AvgPool2d):
net.add_module(str(len(net)), layer) # ***
return net, style_losses, content_losses, tv_losses
def captureTargets():
# Capture content targets
for i in content_losses:
i.mode = 'capture'
print("Capturing content targets")
#print(net)
net(content_image).clone()
# Capture style targets
for i in content_losses:
i.mode = 'None'
i = 0
for image in style_images_caffe:
print("Capturing style target " + str(i+1))
for j in style_losses:
j.mode = 'capture'
j.blend_weight = style_blend_weights[i]
net(style_images_caffe[i]).clone()
i+=1
# Set all loss modules to loss mode
for i in content_losses:
i.mode = 'loss'
for i in style_losses:
i.mode = 'loss'
return
# Configure optimizer and input image
def setupOptimizer(img):
img = nn.Parameter(img.data)
if params.optimizer == 'lbfgs':
print("Running optimization with L-BFGS")
if params.lbfgs_num_correction > 0:
optimizer = optim.LBFGS([img], history_size = params.lbfgs_num_correction)
else:
optimizer = optim.LBFGS([img])
elif params.optimizer == 'adam':
print("Running optimization with ADAM")
for t in xrange(params.num_iterations):
optimizer = optim.Adam([img], lr = params.learning_rate)
return img, optimizer
# Build the model definition and setup pooling layers:
cnn, layerList = buildCNN(params.model_file, params.pooling)
cnn.load_state_dict(torch.load(params.model_file)) # Use the model definition to load model file.
# Convert the model to cuda now, to avoid later issues:
if params.gpu == '0':
cnn = cnn.cuda()
# We only need the features from the model:
cnn = cnn.features
# Build the style transfer network:
net, style_losses, content_losses, tv_losses = modelSetup(cnn, layerList)
captureTargets() # Capture content and style targets separately, to avoid size mismatches.
img, optimizer = setupOptimizer(img) # Setup the optimizer.
# Function to evaluate loss and gradient. We run the net forward and
# backward to get the gradient, and sum up losses from the loss modules.
# optim.lbfgs internally handles iteration and calls this function many
# times, so we manually count the number of iterations to handle printing
# and saving intermediate results.
num_calls = [0]
while num_calls[0] <= params.num_iterations:
def feval():
num_calls[0] += 1
optimizer.zero_grad()
net(img)
contentLoss, styleLoss, tvLoss = 0, 0, 0
contentLossList, styleLossList = [], []
for mod in content_losses:
loss = mod.backward()
contentLoss += loss
contentLossList.append(loss.data[0])
for mod in style_losses:
loss = mod.backward()
styleLoss += loss
styleLossList.append(loss.data[0])
if params.tv_weight > 0:
for mod in tv_losses:
tvLoss += mod.backward()
maybe_save(num_calls[0])
maybe_print(num_calls[0], contentLossList, styleLossList)
return contentLoss + styleLoss + tvLoss
optimizer.step(feval)
@htoyryla
Copy link

htoyryla commented Apr 8, 2018

I think I start to see the point now. The need for an array is because the closure will be run by the optimizer, in a different scope, but we need to ensure that num_calls is shared between your program and the closure. A normal variable would be simply a value, but with an array we have a reference which points to the array. And using += we are modifying the element insde that array.

@ProGamerGov
Copy link
Author

The code in this Gist was continued from here: https://gist.github.com/ProGamerGov/0a4624bd5c06fe72bf7995bcec03b67e

A more up to date version of this code can be found here: https://gist.github.com/ProGamerGov/089a082c2a000d1e1cc034fc75ff5931

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment