Skip to content

Instantly share code, notes, and snippets.

@Shashi456
Created February 13, 2019 10:42
Show Gist options
  • Save Shashi456/c9d847f938d574a511cefa121e8de4e0 to your computer and use it in GitHub Desktop.
Save Shashi456/c9d847f938d574a511cefa121e8de4e0 to your computer and use it in GitHub Desktop.
class GramMatrix(nn.Module):
def forward(self, input):
b, c, h, w = input.size()
f = input.view(b, c, h*w) #bxcx(hxw)
# torch.bmm(batch1, batch2, out=None)
# batch1 : bxmxp, batch2 : bxpxn -> bxmxn
G = torch.bmm(f, f.transpose(1, 2)) # f: bxcx(hxw), f.transpose: bx(hxw)xc -> bxcxc
return G.div_(h*w)
class styleLoss(nn.Module):
def forward(self, input, target):
GramInput = GramMatrix()(input)
return nn.MSELoss()(GramInput, target)
styleTargets = []
for t in vgg(styleImg, style_layers):
t = t.detach()
styleTargets.append(GramMatrix()(t))
contentTargets = []
for t in vgg(contentImg, content_layers):
t = t.detach()
contentTargets.append(t)
style_Losses = [styleLoss()] * len(style_layers)
content_Losses = [nn.MSELoss()] * len(content_layers)
def get_content_loss(content, target):
return tf.reduce_mean(tf.square(content - target)) /2
### Style Loss Fucntion
def gram_matrix(input_tensor):
# if input tensor is a 3D array of size Nh x Nw X Nc
# we reshape it to a 2D array of Nc x (Nh*Nw)
channels = int(input_tensor.shape[-1])
a = tf.reshape(input_tensor, [-1, channels])
n = tf.shape(a)[0]
# get gram matrix
gram = tf.matmul(a, a, transpose_a=True)
return gram
def get_style_loss(base_style, gram_target):
height, width, channels = base_style.get_shape().as_list()
gram_style = gram_matrix(base_style)
# Original eqn as a constant to divide i.e 1/(4. * (channels ** 2) * (width * height) ** 2)
return tf.reduce_mean(tf.square(gram_style - gram_target)) / (channels**2 * width * height) #(4.0 * (channels ** 2) * (width * height) ** 2)
### Use to pass content and style image through it
def get_feature_representations(model, content_path, style_path, num_content_layers):
# Load our images in
content_image = load_img(content_path)
style_image = load_img(style_path)
# batch compute content and style features
content_outputs = model(content_image)
style_outputs = model(style_image)
# Get the style and content feature representations from our model
style_features = [ style_layer[0] for style_layer in style_outputs[num_content_layers:] ]
content_features = [ content_layer[0] for content_layer in content_outputs[:num_content_layers] ]
return style_features, content_features
### Total Loss
def compute_loss(model, loss_weights, generated_output_activations, gram_style_features, content_features, num_content_layers, num_style_layers):
generated_content_activations = generated_output_activations[:num_content_layers]
generated_style_activations = generated_output_activations[num_content_layers:]
style_weight, content_weight = loss_weights
style_score = 0
content_score = 0
# Accumulate style losses from all layers
# Here, we equally weight each contribution of each loss layer
weight_per_style_layer = 1.0 / float(num_style_layers)
for target_style, comb_style in zip(gram_style_features, generated_style_activations):
temp = get_style_loss(comb_style[0], target_style)
style_score += weight_per_style_layer * temp
# Accumulate content losses from all layers
weight_per_content_layer = 1.0 / float(num_content_layers)
for target_content, comb_content in zip(content_features, generated_content_activations):
temp = get_content_loss(comb_content[0], target_content)
content_score += weight_per_content_layer* temp
# Get total loss
loss = style_weight*style_score + content_weight*content_score
return loss, style_score, content_score
def get_model(content_layers,style_layers):
# Load our model. We load pretrained VGG, trained on imagenet data
vgg19 = VGG19(weights=None, include_top=False)
# We don't need to (or want to) train any layers of our pre-trained vgg model, so we set it's trainable to false.
vgg19.trainable = False
style_model_outputs = [vgg19.get_layer(name).output for name in style_layers]
content_model_outputs = [vgg19.get_layer(name).output for name in content_layers]
model_outputs = content_model_outputs + style_model_outputs
# Build model
return Model(inputs = vgg19.input, outputs = model_outputs), vgg19
#Writing the VGG network
class VGG(nn.Module):
def __init__(self): #Can have an optional pooling parameter to make it average or max
super(VGG,self).__init__()
##VGG layers
self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.conv3_4 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
self.conv4_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
self.conv5_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
#Pooling Layers : The orignal paper mentioned average Pooling
self.p1 = nn.AvgPool2d(kernel_size=2, stride=2)
self.p2 = nn.AvgPool2d(kernel_size=2, stride=2)
self.p3 = nn.AvgPool2d(kernel_size=2, stride=2)
self.p4 = nn.AvgPool2d(kernel_size=2, stride=2)
self.p5 = nn.AvgPool2d(kernel_size=2, stride=2)
def forward(self, x, out_params = None):
out = {}
# Building up the VGG net that's going to be used
out['re11'] = F.relu(self.conv1_1(x))
out['re12'] = F.relu(self.conv1_2(out['re11']))
out['p1'] = self.p1(out['re12'])
h_relu1_2 = out['re12']
out['re21'] = F.relu(self.conv2_1(out['p1']))
out['re22'] = F.relu(self.conv2_2(out['re21']))
out['p2'] = self.p2(out['re22'])
h_relu2_2 = out['re22']
out['re31'] = F.relu(self.conv3_1(out['p2']))
out['re32'] = F.relu(self.conv3_2(out['re31']))
out['re33'] = F.relu(self.conv3_3(out['re32']))
out['re34'] = F.relu(self.conv3_4(out['re33']))
out['p3'] = self.p3(out['re34'])
h_relu3_3 = out['re33']
out['re41'] = F.relu(self.conv4_1(out['p3']))
out['re42'] = F.relu(self.conv4_2(out['re41']))
out['re43'] = F.relu(self.conv4_3(out['re42']))
out['re44'] = F.relu(self.conv4_4(out['re43']))
h_relu4_3 = out['re43']
out['p4'] = self.p4(out['re44'])
out['re51'] = F.relu(self.conv5_1(out['p4']))
out['re52'] = F.relu(self.conv5_2(out['re51']))
out['re53'] = F.relu(self.conv5_3(out['re52']))
out['re54'] = F.relu(self.conv5_4(out['re53']))
out['p5'] = self.p5(out['re54'])
if out_params is not None:
return [out[param] for param in out_params]
vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'])
out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3)
return out
def run_style_transfer(content_path, style_path, num_iterations=200, content_weight=0.1, style_weight=0.9):
# Create a tensorflow session
sess = tf.Session()
# Assign keras back-end to the TF session which we created
K.set_session(sess)
model, vgg19 = get_model(content_layers,style_layers)
# Get the style and content feature representations (from our specified intermediate layers)
style_features, content_features = get_feature_representations(model, content_path, style_path, num_content_layers)
gram_style_features = [gram_matrix(style_feature) for style_feature in style_features]
# VGG default normalization
norm_means = np.array([103.939, 116.779, 123.68])
min_vals = -norm_means
max_vals = 255 - norm_means
# In original paper, the initial stylized image is random matrix of same size as that of content image
# but in later images content image was used instead on random values for first stylized image
# because it proved to help to stylize faster
generated_image = load_img(content_path)
# generated_image = np.random.randint(0,255, size=generated_image.shape)
# Create tensorflow variable to hold a stylized/generated image during the training
generated_image = tf.Variable(generated_image, dtype=tf.float32)
model_outputs = model(generated_image)
# weightages of each content and style images i.e alpha & beta
loss_weights = (style_weight, content_weight)
# Create our optimizer
loss = compute_loss(model, loss_weights, model_outputs, gram_style_features, content_features, num_content_layers, num_style_layers)
opt = tf.train.AdamOptimizer(learning_rate=9, beta1=0.9, epsilon=1e-1).minimize( loss[0], var_list = [generated_image])
sess.run(tf.global_variables_initializer())
sess.run(generated_image.initializer)
# loading the weights again because tf.global_variables_initializer() resets the weights
vgg19.load_weights(vgg_weights)
# Put loss as infinity before training starts and Create a variable to hold best image (i.e image with minimum loss)
best_loss, best_img = float('inf'), None
for i in range(num_iterations):
# Do optimization
sess.run(opt)
# Make sure image values stays in the range of max-min value of VGG norm
clipped = tf.clip_by_value(generated_image, min_vals, max_vals)
# assign the clipped value to the tensor stylized image
generated_image.assign(clipped)
# Open the Tuple of tensors
total_loss, style_score, content_score = loss
total_loss = total_loss.eval(session=sess)
if total_loss < best_loss:
# Update best loss and best image from total loss.
best_loss = total_loss
best_img = deprocess_img(generated_image.eval(session=sess))
# print best loss
print('\nbest: iteration: ',i,' loss: ',total_loss,' style_loss: ', style_score.eval(session=sess),' content_loss: ',content_score.eval(session=sess),'\n')
# Save image after every 100 iterations
if (i+1)%100 == 0:
output = Image.fromarray(best_img)
output.save(str(i+1)+'-'+save_name)
# after num_iterations iterations are completed, close the TF session
sess.close()
return best_img, best_loss
optimImg = Variable(contentImg.data.clone(), requires_grad=True)
optimizer = optim.LBFGS([optimImg])
#Shifting everything to cuda
for loss in losses:
loss = loss.cuda()
optimImg.cuda()
# Training
no_iter = 100
for iteration in range(1, no_iter):
print('Iteration [%d]/[%d]'%(iteration,no_iter))
def cl():
optimizer.zero_grad()
out = vgg(optimImg, loss_layers)
totalLossList = []
for i in range(len(out)):
layer_output = out[i]
loss_i = losses[i]
target_i = targets[i]
totalLossList.append(loss_i(layer_output, target_i) * weights[i])
totalLoss = sum(totalLossList)
totalLoss.backward()
print('Loss: %f'%(totalLoss.data[0]))
return totalLoss
optimizer.step(cl)
outImg = optimImg.data[0].cpu()
save_img(outImg.squeeze())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment