Shashi456/Loss_pyt.py

## Loss_pyt.py

class GramMatrix(nn.Module):
    def forward(self, input):
        b, c, h, w = input.size()
        f = input.view(b, c, h*w) #bxcx(hxw)
        # torch.bmm(batch1, batch2, out=None)
        # batch1 : bxmxp, batch2 : bxpxn -> bxmxn
        G = torch.bmm(f, f.transpose(1, 2)) # f: bxcx(hxw), f.transpose: bx(hxw)xc -> bxcxc
        return G.div_(h*w)

class styleLoss(nn.Module):
    def forward(self, input, target):
        GramInput = GramMatrix()(input)
        return nn.MSELoss()(GramInput, target)

styleTargets = []
for t in vgg(styleImg, style_layers):
    t = t.detach()
    styleTargets.append(GramMatrix()(t))

contentTargets = []
for t in vgg(contentImg, content_layers):
    t = t.detach()
    contentTargets.append(t)

style_Losses = [styleLoss()] * len(style_layers)

content_Losses = [nn.MSELoss()] * len(content_layers)

## loss_tf.py

def get_content_loss(content, target):
  return tf.reduce_mean(tf.square(content - target)) /2


### Style Loss Fucntion
def gram_matrix(input_tensor):

  # if input tensor is a 3D array of size Nh x Nw X Nc
  # we reshape it to a 2D array of Nc x (Nh*Nw)
  channels = int(input_tensor.shape[-1])
  a = tf.reshape(input_tensor, [-1, channels])
  n = tf.shape(a)[0]

  # get gram matrix
  gram = tf.matmul(a, a, transpose_a=True)

  return gram

def get_style_loss(base_style, gram_target):

  height, width, channels = base_style.get_shape().as_list()
  gram_style = gram_matrix(base_style)

  # Original eqn as a constant to divide i.e 1/(4. * (channels ** 2) * (width * height) ** 2)
  return tf.reduce_mean(tf.square(gram_style - gram_target)) / (channels**2 * width * height) #(4.0 * (channels ** 2) * (width * height) ** 2)


### Use to pass content and style image through it
def get_feature_representations(model, content_path, style_path, num_content_layers):

  # Load our images in
  content_image = load_img(content_path)
  style_image   = load_img(style_path)

  # batch compute content and style features
  content_outputs = model(content_image)
  style_outputs   = model(style_image)

  # Get the style and content feature representations from our model
  style_features   = [ style_layer[0]  for style_layer    in style_outputs[num_content_layers:] ]
  content_features = [ content_layer[0] for content_layer in content_outputs[:num_content_layers] ]

  return style_features, content_features


### Total Loss
def compute_loss(model, loss_weights, generated_output_activations, gram_style_features, content_features, num_content_layers, num_style_layers):

  generated_content_activations = generated_output_activations[:num_content_layers]
  generated_style_activations   = generated_output_activations[num_content_layers:]

  style_weight, content_weight = loss_weights

  style_score = 0
  content_score = 0

  # Accumulate style losses from all layers
  # Here, we equally weight each contribution of each loss layer
  weight_per_style_layer = 1.0 / float(num_style_layers)
  for target_style, comb_style in zip(gram_style_features, generated_style_activations):
    temp = get_style_loss(comb_style[0], target_style)
    style_score += weight_per_style_layer * temp

  # Accumulate content losses from all layers
  weight_per_content_layer = 1.0 / float(num_content_layers)
  for target_content, comb_content in zip(content_features, generated_content_activations):
    temp = get_content_loss(comb_content[0], target_content)
    content_score += weight_per_content_layer* temp

  # Get total loss
  loss = style_weight*style_score + content_weight*content_score


return loss, style_score, content_score

## Model_tf.py
def get_model(content_layers,style_layers):

  # Load our model. We load pretrained VGG, trained on imagenet data
  vgg19           = VGG19(weights=None, include_top=False)

  # We don't need to (or want to) train any layers of our pre-trained vgg model, so we set it's trainable to false.
  vgg19.trainable = False

  style_model_outputs   =  [vgg19.get_layer(name).output for name in style_layers]
  content_model_outputs =  [vgg19.get_layer(name).output for name in content_layers]

  model_outputs = content_model_outputs + style_model_outputs

  # Build model
return Model(inputs = vgg19.input, outputs = model_outputs), vgg19

## Network_Pytorch.py
#Writing the VGG network
class VGG(nn.Module):
    def __init__(self): #Can have an optional pooling parameter to make it average or max
        super(VGG,self).__init__()
        ##VGG layers
        self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv3_4 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv4_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        #Pooling Layers : The orignal paper mentioned average Pooling
        self.p1 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.p2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.p3 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.p4 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.p5 = nn.AvgPool2d(kernel_size=2, stride=2)

    def forward(self, x, out_params = None):
        out = {}
        # Building up the VGG net that's going to be used
        out['re11'] = F.relu(self.conv1_1(x))
        out['re12'] = F.relu(self.conv1_2(out['re11']))
        out['p1'] = self.p1(out['re12'])
        h_relu1_2 = out['re12']
        out['re21'] = F.relu(self.conv2_1(out['p1']))
        out['re22'] = F.relu(self.conv2_2(out['re21']))
        out['p2'] = self.p2(out['re22'])
        h_relu2_2 = out['re22']
        out['re31'] = F.relu(self.conv3_1(out['p2']))
        out['re32'] = F.relu(self.conv3_2(out['re31']))
        out['re33'] = F.relu(self.conv3_3(out['re32']))
        out['re34'] = F.relu(self.conv3_4(out['re33']))
        out['p3'] = self.p3(out['re34'])
        h_relu3_3 = out['re33']
        out['re41'] = F.relu(self.conv4_1(out['p3']))
        out['re42'] = F.relu(self.conv4_2(out['re41']))
        out['re43'] = F.relu(self.conv4_3(out['re42']))
        out['re44'] = F.relu(self.conv4_4(out['re43']))
        h_relu4_3 = out['re43']
        out['p4'] = self.p4(out['re44'])
        out['re51'] = F.relu(self.conv5_1(out['p4']))
        out['re52'] = F.relu(self.conv5_2(out['re51']))
        out['re53'] = F.relu(self.conv5_3(out['re52']))
        out['re54'] = F.relu(self.conv5_4(out['re53']))
        out['p5'] = self.p5(out['re54'])
        if out_params is not None:
             return [out[param] for param in out_params]
        vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'])
        out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3)
        return out


## Train_tf.py
def run_style_transfer(content_path, style_path, num_iterations=200, content_weight=0.1, style_weight=0.9):

  # Create a tensorflow session
  sess = tf.Session()

  # Assign keras back-end to the TF session which we created
  K.set_session(sess)

  model, vgg19 = get_model(content_layers,style_layers)

  # Get the style and content feature representations (from our specified intermediate layers)
  style_features, content_features = get_feature_representations(model, content_path, style_path, num_content_layers)
  gram_style_features = [gram_matrix(style_feature) for style_feature in style_features]

  # VGG default normalization
  norm_means = np.array([103.939, 116.779, 123.68])
  min_vals = -norm_means
  max_vals = 255 - norm_means


  # In original paper, the initial stylized image is random matrix of same size as that of content image
  # but in later images content image was used instead on random values for first stylized image
  # because it proved to help to stylize faster
  generated_image = load_img(content_path)
  # generated_image = np.random.randint(0,255, size=generated_image.shape)

  # Create tensorflow variable to hold a stylized/generated image during the training
  generated_image = tf.Variable(generated_image, dtype=tf.float32)

  model_outputs = model(generated_image)

  # weightages of each content and style images i.e alpha & beta
  loss_weights = (style_weight, content_weight)

  # Create our optimizer
  loss = compute_loss(model, loss_weights, model_outputs, gram_style_features, content_features, num_content_layers, num_style_layers)
  opt = tf.train.AdamOptimizer(learning_rate=9, beta1=0.9, epsilon=1e-1).minimize( loss[0], var_list = [generated_image])

  sess.run(tf.global_variables_initializer())
  sess.run(generated_image.initializer)

  # loading the weights again because tf.global_variables_initializer() resets the weights
  vgg19.load_weights(vgg_weights)


  # Put loss as infinity before training starts and Create a variable to hold best image (i.e image with minimum loss)
  best_loss, best_img = float('inf'), None

  for i in range(num_iterations):

    # Do optimization
    sess.run(opt)

    # Make sure image values stays in the range of max-min value of VGG norm
    clipped = tf.clip_by_value(generated_image, min_vals, max_vals)
    # assign the clipped value to the tensor stylized image
    generated_image.assign(clipped)


    # Open the Tuple of tensors
    total_loss, style_score, content_score = loss
    total_loss = total_loss.eval(session=sess)


    if total_loss < best_loss:

      # Update best loss and best image from total loss.
      best_loss = total_loss
      best_img = deprocess_img(generated_image.eval(session=sess))

      # print best loss
      print('\nbest:      iteration: ',i,'   loss: ',total_loss,'  style_loss:    ', style_score.eval(session=sess),'  content_loss:    ',content_score.eval(session=sess),'\n')

    # Save image after every 100 iterations
    if (i+1)%100 == 0:
      output = Image.fromarray(best_img)
      output.save(str(i+1)+'-'+save_name)

  # after num_iterations iterations are completed, close the TF session
  sess.close()

return best_img, best_loss

## training_pyt.py
optimImg = Variable(contentImg.data.clone(), requires_grad=True)
optimizer = optim.LBFGS([optimImg])

#Shifting everything to cuda
for loss in losses:
    loss = loss.cuda()
optimImg.cuda()

# Training
no_iter = 100

for iteration in range(1, no_iter):
    print('Iteration [%d]/[%d]'%(iteration,no_iter))
    def cl():
        optimizer.zero_grad()
        out = vgg(optimImg, loss_layers)
        totalLossList = []
        for i in range(len(out)):
            layer_output = out[i]
            loss_i = losses[i]
            target_i = targets[i]
            totalLossList.append(loss_i(layer_output, target_i) * weights[i])
        totalLoss = sum(totalLossList)
        totalLoss.backward()
        print('Loss: %f'%(totalLoss.data[0]))
        return totalLoss
    optimizer.step(cl)
outImg = optimImg.data[0].cpu()
save_img(outImg.squeeze())

	class GramMatrix(nn.Module):
	def forward(self, input):
	b, c, h, w = input.size()
	f = input.view(b, c, h*w) #bxcx(hxw)
	# torch.bmm(batch1, batch2, out=None)
	# batch1 : bxmxp, batch2 : bxpxn -> bxmxn
	G = torch.bmm(f, f.transpose(1, 2)) # f: bxcx(hxw), f.transpose: bx(hxw)xc -> bxcxc
	return G.div_(h*w)

	class styleLoss(nn.Module):
	def forward(self, input, target):
	GramInput = GramMatrix()(input)
	return nn.MSELoss()(GramInput, target)

	styleTargets = []
	for t in vgg(styleImg, style_layers):
	t = t.detach()
	styleTargets.append(GramMatrix()(t))

	contentTargets = []
	for t in vgg(contentImg, content_layers):
	t = t.detach()
	contentTargets.append(t)

	style_Losses = [styleLoss()] * len(style_layers)

	content_Losses = [nn.MSELoss()] * len(content_layers)

	def get_content_loss(content, target):
	return tf.reduce_mean(tf.square(content - target)) /2


	### Style Loss Fucntion
	def gram_matrix(input_tensor):

	# if input tensor is a 3D array of size Nh x Nw X Nc
	# we reshape it to a 2D array of Nc x (Nh*Nw)
	channels = int(input_tensor.shape[-1])
	a = tf.reshape(input_tensor, [-1, channels])
	n = tf.shape(a)[0]

	# get gram matrix
	gram = tf.matmul(a, a, transpose_a=True)

	return gram

	def get_style_loss(base_style, gram_target):

	height, width, channels = base_style.get_shape().as_list()
	gram_style = gram_matrix(base_style)

	# Original eqn as a constant to divide i.e 1/(4. * (channels ** 2) * (width * height) ** 2)
	return tf.reduce_mean(tf.square(gram_style - gram_target)) / (channels*2 width * height) #(4.0 * (channels ** 2) * (width * height) ** 2)



	### Use to pass content and style image through it
	def get_feature_representations(model, content_path, style_path, num_content_layers):

	# Load our images in
	content_image = load_img(content_path)
	style_image = load_img(style_path)

	# batch compute content and style features
	content_outputs = model(content_image)
	style_outputs = model(style_image)

	# Get the style and content feature representations from our model
	style_features = [ style_layer[0] for style_layer in style_outputs[num_content_layers:] ]
	content_features = [ content_layer[0] for content_layer in content_outputs[:num_content_layers] ]

	return style_features, content_features


	### Total Loss
	def compute_loss(model, loss_weights, generated_output_activations, gram_style_features, content_features, num_content_layers, num_style_layers):

	generated_content_activations = generated_output_activations[:num_content_layers]
	generated_style_activations = generated_output_activations[num_content_layers:]

	style_weight, content_weight = loss_weights

	style_score = 0
	content_score = 0

	# Accumulate style losses from all layers
	# Here, we equally weight each contribution of each loss layer
	weight_per_style_layer = 1.0 / float(num_style_layers)
	for target_style, comb_style in zip(gram_style_features, generated_style_activations):
	temp = get_style_loss(comb_style[0], target_style)
	style_score += weight_per_style_layer * temp

	# Accumulate content losses from all layers
	weight_per_content_layer = 1.0 / float(num_content_layers)
	for target_content, comb_content in zip(content_features, generated_content_activations):
	temp = get_content_loss(comb_content[0], target_content)
	content_score += weight_per_content_layer* temp

	# Get total loss
	loss = style_weightstyle_score + content_weightcontent_score


	return loss, style_score, content_score
	def get_model(content_layers,style_layers):

	# Load our model. We load pretrained VGG, trained on imagenet data
	vgg19 = VGG19(weights=None, include_top=False)

	# We don't need to (or want to) train any layers of our pre-trained vgg model, so we set it's trainable to false.
	vgg19.trainable = False

	style_model_outputs = [vgg19.get_layer(name).output for name in style_layers]
	content_model_outputs = [vgg19.get_layer(name).output for name in content_layers]

	model_outputs = content_model_outputs + style_model_outputs

	# Build model
	return Model(inputs = vgg19.input, outputs = model_outputs), vgg19
	#Writing the VGG network
	class VGG(nn.Module):
	def __init__(self): #Can have an optional pooling parameter to make it average or max
	super(VGG,self).__init__()
	##VGG layers
	self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
	self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
	self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
	self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
	self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
	self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
	self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
	self.conv3_4 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
	self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
	self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
	self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
	self.conv4_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
	self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
	self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
	self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
	self.conv5_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

	#Pooling Layers : The orignal paper mentioned average Pooling
	self.p1 = nn.AvgPool2d(kernel_size=2, stride=2)
	self.p2 = nn.AvgPool2d(kernel_size=2, stride=2)
	self.p3 = nn.AvgPool2d(kernel_size=2, stride=2)
	self.p4 = nn.AvgPool2d(kernel_size=2, stride=2)
	self.p5 = nn.AvgPool2d(kernel_size=2, stride=2)

	def forward(self, x, out_params = None):
	out = {}
	# Building up the VGG net that's going to be used
	out['re11'] = F.relu(self.conv1_1(x))
	out['re12'] = F.relu(self.conv1_2(out['re11']))
	out['p1'] = self.p1(out['re12'])
	h_relu1_2 = out['re12']
	out['re21'] = F.relu(self.conv2_1(out['p1']))
	out['re22'] = F.relu(self.conv2_2(out['re21']))
	out['p2'] = self.p2(out['re22'])
	h_relu2_2 = out['re22']
	out['re31'] = F.relu(self.conv3_1(out['p2']))
	out['re32'] = F.relu(self.conv3_2(out['re31']))
	out['re33'] = F.relu(self.conv3_3(out['re32']))
	out['re34'] = F.relu(self.conv3_4(out['re33']))
	out['p3'] = self.p3(out['re34'])
	h_relu3_3 = out['re33']
	out['re41'] = F.relu(self.conv4_1(out['p3']))
	out['re42'] = F.relu(self.conv4_2(out['re41']))
	out['re43'] = F.relu(self.conv4_3(out['re42']))
	out['re44'] = F.relu(self.conv4_4(out['re43']))
	h_relu4_3 = out['re43']
	out['p4'] = self.p4(out['re44'])
	out['re51'] = F.relu(self.conv5_1(out['p4']))
	out['re52'] = F.relu(self.conv5_2(out['re51']))
	out['re53'] = F.relu(self.conv5_3(out['re52']))
	out['re54'] = F.relu(self.conv5_4(out['re53']))
	out['p5'] = self.p5(out['re54'])
	if out_params is not None:
	return [out[param] for param in out_params]
	vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'])
	out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3)
	return out
	def run_style_transfer(content_path, style_path, num_iterations=200, content_weight=0.1, style_weight=0.9):

	# Create a tensorflow session
	sess = tf.Session()

	# Assign keras back-end to the TF session which we created
	K.set_session(sess)

	model, vgg19 = get_model(content_layers,style_layers)

	# Get the style and content feature representations (from our specified intermediate layers)
	style_features, content_features = get_feature_representations(model, content_path, style_path, num_content_layers)
	gram_style_features = [gram_matrix(style_feature) for style_feature in style_features]

	# VGG default normalization
	norm_means = np.array([103.939, 116.779, 123.68])
	min_vals = -norm_means
	max_vals = 255 - norm_means


	# In original paper, the initial stylized image is random matrix of same size as that of content image
	# but in later images content image was used instead on random values for first stylized image
	# because it proved to help to stylize faster
	generated_image = load_img(content_path)
	# generated_image = np.random.randint(0,255, size=generated_image.shape)

	# Create tensorflow variable to hold a stylized/generated image during the training
	generated_image = tf.Variable(generated_image, dtype=tf.float32)

	model_outputs = model(generated_image)

	# weightages of each content and style images i.e alpha & beta
	loss_weights = (style_weight, content_weight)

	# Create our optimizer
	loss = compute_loss(model, loss_weights, model_outputs, gram_style_features, content_features, num_content_layers, num_style_layers)
	opt = tf.train.AdamOptimizer(learning_rate=9, beta1=0.9, epsilon=1e-1).minimize( loss[0], var_list = [generated_image])

	sess.run(tf.global_variables_initializer())
	sess.run(generated_image.initializer)

	# loading the weights again because tf.global_variables_initializer() resets the weights
	vgg19.load_weights(vgg_weights)


	# Put loss as infinity before training starts and Create a variable to hold best image (i.e image with minimum loss)
	best_loss, best_img = float('inf'), None

	for i in range(num_iterations):

	# Do optimization
	sess.run(opt)

	# Make sure image values stays in the range of max-min value of VGG norm
	clipped = tf.clip_by_value(generated_image, min_vals, max_vals)
	# assign the clipped value to the tensor stylized image
	generated_image.assign(clipped)


	# Open the Tuple of tensors
	total_loss, style_score, content_score = loss
	total_loss = total_loss.eval(session=sess)


	if total_loss < best_loss:

	# Update best loss and best image from total loss.
	best_loss = total_loss
	best_img = deprocess_img(generated_image.eval(session=sess))

	# print best loss
	print('\nbest: iteration: ',i,' loss: ',total_loss,' style_loss: ', style_score.eval(session=sess),' content_loss: ',content_score.eval(session=sess),'\n')

	# Save image after every 100 iterations
	if (i+1)%100 == 0:
	output = Image.fromarray(best_img)
	output.save(str(i+1)+'-'+save_name)

	# after num_iterations iterations are completed, close the TF session
	sess.close()

	return best_img, best_loss
	optimImg = Variable(contentImg.data.clone(), requires_grad=True)
	optimizer = optim.LBFGS([optimImg])

	#Shifting everything to cuda
	for loss in losses:
	loss = loss.cuda()
	optimImg.cuda()

	# Training
	no_iter = 100

	for iteration in range(1, no_iter):
	print('Iteration [%d]/[%d]'%(iteration,no_iter))
	def cl():
	optimizer.zero_grad()
	out = vgg(optimImg, loss_layers)
	totalLossList = []
	for i in range(len(out)):
	layer_output = out[i]
	loss_i = losses[i]
	target_i = targets[i]
	totalLossList.append(loss_i(layer_output, target_i) * weights[i])
	totalLoss = sum(totalLossList)
	totalLoss.backward()
	print('Loss: %f'%(totalLoss.data[0]))
	return totalLoss
	optimizer.step(cl)
	outImg = optimImg.data[0].cpu()
	save_img(outImg.squeeze())