Created
February 13, 2019 10:42
-
-
Save Shashi456/c9d847f938d574a511cefa121e8de4e0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class GramMatrix(nn.Module): | |
def forward(self, input): | |
b, c, h, w = input.size() | |
f = input.view(b, c, h*w) #bxcx(hxw) | |
# torch.bmm(batch1, batch2, out=None) | |
# batch1 : bxmxp, batch2 : bxpxn -> bxmxn | |
G = torch.bmm(f, f.transpose(1, 2)) # f: bxcx(hxw), f.transpose: bx(hxw)xc -> bxcxc | |
return G.div_(h*w) | |
class styleLoss(nn.Module): | |
def forward(self, input, target): | |
GramInput = GramMatrix()(input) | |
return nn.MSELoss()(GramInput, target) | |
styleTargets = [] | |
for t in vgg(styleImg, style_layers): | |
t = t.detach() | |
styleTargets.append(GramMatrix()(t)) | |
contentTargets = [] | |
for t in vgg(contentImg, content_layers): | |
t = t.detach() | |
contentTargets.append(t) | |
style_Losses = [styleLoss()] * len(style_layers) | |
content_Losses = [nn.MSELoss()] * len(content_layers) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_content_loss(content, target): | |
return tf.reduce_mean(tf.square(content - target)) /2 | |
### Style Loss Fucntion | |
def gram_matrix(input_tensor): | |
# if input tensor is a 3D array of size Nh x Nw X Nc | |
# we reshape it to a 2D array of Nc x (Nh*Nw) | |
channels = int(input_tensor.shape[-1]) | |
a = tf.reshape(input_tensor, [-1, channels]) | |
n = tf.shape(a)[0] | |
# get gram matrix | |
gram = tf.matmul(a, a, transpose_a=True) | |
return gram | |
def get_style_loss(base_style, gram_target): | |
height, width, channels = base_style.get_shape().as_list() | |
gram_style = gram_matrix(base_style) | |
# Original eqn as a constant to divide i.e 1/(4. * (channels ** 2) * (width * height) ** 2) | |
return tf.reduce_mean(tf.square(gram_style - gram_target)) / (channels**2 * width * height) #(4.0 * (channels ** 2) * (width * height) ** 2) | |
### Use to pass content and style image through it | |
def get_feature_representations(model, content_path, style_path, num_content_layers): | |
# Load our images in | |
content_image = load_img(content_path) | |
style_image = load_img(style_path) | |
# batch compute content and style features | |
content_outputs = model(content_image) | |
style_outputs = model(style_image) | |
# Get the style and content feature representations from our model | |
style_features = [ style_layer[0] for style_layer in style_outputs[num_content_layers:] ] | |
content_features = [ content_layer[0] for content_layer in content_outputs[:num_content_layers] ] | |
return style_features, content_features | |
### Total Loss | |
def compute_loss(model, loss_weights, generated_output_activations, gram_style_features, content_features, num_content_layers, num_style_layers): | |
generated_content_activations = generated_output_activations[:num_content_layers] | |
generated_style_activations = generated_output_activations[num_content_layers:] | |
style_weight, content_weight = loss_weights | |
style_score = 0 | |
content_score = 0 | |
# Accumulate style losses from all layers | |
# Here, we equally weight each contribution of each loss layer | |
weight_per_style_layer = 1.0 / float(num_style_layers) | |
for target_style, comb_style in zip(gram_style_features, generated_style_activations): | |
temp = get_style_loss(comb_style[0], target_style) | |
style_score += weight_per_style_layer * temp | |
# Accumulate content losses from all layers | |
weight_per_content_layer = 1.0 / float(num_content_layers) | |
for target_content, comb_content in zip(content_features, generated_content_activations): | |
temp = get_content_loss(comb_content[0], target_content) | |
content_score += weight_per_content_layer* temp | |
# Get total loss | |
loss = style_weight*style_score + content_weight*content_score | |
return loss, style_score, content_score |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_model(content_layers,style_layers): | |
# Load our model. We load pretrained VGG, trained on imagenet data | |
vgg19 = VGG19(weights=None, include_top=False) | |
# We don't need to (or want to) train any layers of our pre-trained vgg model, so we set it's trainable to false. | |
vgg19.trainable = False | |
style_model_outputs = [vgg19.get_layer(name).output for name in style_layers] | |
content_model_outputs = [vgg19.get_layer(name).output for name in content_layers] | |
model_outputs = content_model_outputs + style_model_outputs | |
# Build model | |
return Model(inputs = vgg19.input, outputs = model_outputs), vgg19 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Writing the VGG network | |
class VGG(nn.Module): | |
def __init__(self): #Can have an optional pooling parameter to make it average or max | |
super(VGG,self).__init__() | |
##VGG layers | |
self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1) | |
self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1) | |
self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1) | |
self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1) | |
self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1) | |
self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1) | |
self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1) | |
self.conv3_4 = nn.Conv2d(256, 256, kernel_size=3, padding=1) | |
self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1) | |
self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1) | |
self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1) | |
self.conv4_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1) | |
self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1) | |
self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1) | |
self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1) | |
self.conv5_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1) | |
#Pooling Layers : The orignal paper mentioned average Pooling | |
self.p1 = nn.AvgPool2d(kernel_size=2, stride=2) | |
self.p2 = nn.AvgPool2d(kernel_size=2, stride=2) | |
self.p3 = nn.AvgPool2d(kernel_size=2, stride=2) | |
self.p4 = nn.AvgPool2d(kernel_size=2, stride=2) | |
self.p5 = nn.AvgPool2d(kernel_size=2, stride=2) | |
def forward(self, x, out_params = None): | |
out = {} | |
# Building up the VGG net that's going to be used | |
out['re11'] = F.relu(self.conv1_1(x)) | |
out['re12'] = F.relu(self.conv1_2(out['re11'])) | |
out['p1'] = self.p1(out['re12']) | |
h_relu1_2 = out['re12'] | |
out['re21'] = F.relu(self.conv2_1(out['p1'])) | |
out['re22'] = F.relu(self.conv2_2(out['re21'])) | |
out['p2'] = self.p2(out['re22']) | |
h_relu2_2 = out['re22'] | |
out['re31'] = F.relu(self.conv3_1(out['p2'])) | |
out['re32'] = F.relu(self.conv3_2(out['re31'])) | |
out['re33'] = F.relu(self.conv3_3(out['re32'])) | |
out['re34'] = F.relu(self.conv3_4(out['re33'])) | |
out['p3'] = self.p3(out['re34']) | |
h_relu3_3 = out['re33'] | |
out['re41'] = F.relu(self.conv4_1(out['p3'])) | |
out['re42'] = F.relu(self.conv4_2(out['re41'])) | |
out['re43'] = F.relu(self.conv4_3(out['re42'])) | |
out['re44'] = F.relu(self.conv4_4(out['re43'])) | |
h_relu4_3 = out['re43'] | |
out['p4'] = self.p4(out['re44']) | |
out['re51'] = F.relu(self.conv5_1(out['p4'])) | |
out['re52'] = F.relu(self.conv5_2(out['re51'])) | |
out['re53'] = F.relu(self.conv5_3(out['re52'])) | |
out['re54'] = F.relu(self.conv5_4(out['re53'])) | |
out['p5'] = self.p5(out['re54']) | |
if out_params is not None: | |
return [out[param] for param in out_params] | |
vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3']) | |
out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3) | |
return out | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def run_style_transfer(content_path, style_path, num_iterations=200, content_weight=0.1, style_weight=0.9): | |
# Create a tensorflow session | |
sess = tf.Session() | |
# Assign keras back-end to the TF session which we created | |
K.set_session(sess) | |
model, vgg19 = get_model(content_layers,style_layers) | |
# Get the style and content feature representations (from our specified intermediate layers) | |
style_features, content_features = get_feature_representations(model, content_path, style_path, num_content_layers) | |
gram_style_features = [gram_matrix(style_feature) for style_feature in style_features] | |
# VGG default normalization | |
norm_means = np.array([103.939, 116.779, 123.68]) | |
min_vals = -norm_means | |
max_vals = 255 - norm_means | |
# In original paper, the initial stylized image is random matrix of same size as that of content image | |
# but in later images content image was used instead on random values for first stylized image | |
# because it proved to help to stylize faster | |
generated_image = load_img(content_path) | |
# generated_image = np.random.randint(0,255, size=generated_image.shape) | |
# Create tensorflow variable to hold a stylized/generated image during the training | |
generated_image = tf.Variable(generated_image, dtype=tf.float32) | |
model_outputs = model(generated_image) | |
# weightages of each content and style images i.e alpha & beta | |
loss_weights = (style_weight, content_weight) | |
# Create our optimizer | |
loss = compute_loss(model, loss_weights, model_outputs, gram_style_features, content_features, num_content_layers, num_style_layers) | |
opt = tf.train.AdamOptimizer(learning_rate=9, beta1=0.9, epsilon=1e-1).minimize( loss[0], var_list = [generated_image]) | |
sess.run(tf.global_variables_initializer()) | |
sess.run(generated_image.initializer) | |
# loading the weights again because tf.global_variables_initializer() resets the weights | |
vgg19.load_weights(vgg_weights) | |
# Put loss as infinity before training starts and Create a variable to hold best image (i.e image with minimum loss) | |
best_loss, best_img = float('inf'), None | |
for i in range(num_iterations): | |
# Do optimization | |
sess.run(opt) | |
# Make sure image values stays in the range of max-min value of VGG norm | |
clipped = tf.clip_by_value(generated_image, min_vals, max_vals) | |
# assign the clipped value to the tensor stylized image | |
generated_image.assign(clipped) | |
# Open the Tuple of tensors | |
total_loss, style_score, content_score = loss | |
total_loss = total_loss.eval(session=sess) | |
if total_loss < best_loss: | |
# Update best loss and best image from total loss. | |
best_loss = total_loss | |
best_img = deprocess_img(generated_image.eval(session=sess)) | |
# print best loss | |
print('\nbest: iteration: ',i,' loss: ',total_loss,' style_loss: ', style_score.eval(session=sess),' content_loss: ',content_score.eval(session=sess),'\n') | |
# Save image after every 100 iterations | |
if (i+1)%100 == 0: | |
output = Image.fromarray(best_img) | |
output.save(str(i+1)+'-'+save_name) | |
# after num_iterations iterations are completed, close the TF session | |
sess.close() | |
return best_img, best_loss |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
optimImg = Variable(contentImg.data.clone(), requires_grad=True) | |
optimizer = optim.LBFGS([optimImg]) | |
#Shifting everything to cuda | |
for loss in losses: | |
loss = loss.cuda() | |
optimImg.cuda() | |
# Training | |
no_iter = 100 | |
for iteration in range(1, no_iter): | |
print('Iteration [%d]/[%d]'%(iteration,no_iter)) | |
def cl(): | |
optimizer.zero_grad() | |
out = vgg(optimImg, loss_layers) | |
totalLossList = [] | |
for i in range(len(out)): | |
layer_output = out[i] | |
loss_i = losses[i] | |
target_i = targets[i] | |
totalLossList.append(loss_i(layer_output, target_i) * weights[i]) | |
totalLoss = sum(totalLossList) | |
totalLoss.backward() | |
print('Loss: %f'%(totalLoss.data[0])) | |
return totalLoss | |
optimizer.step(cl) | |
outImg = optimImg.data[0].cpu() | |
save_img(outImg.squeeze()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment