MSWon/Channelwise_SelfAttention.py

## Channelwise_SelfAttention.py
# -*- coding: utf-8 -*-
"""
Created on Sat Apr  6 10:47:30 2019

@author: jbk48
"""

from keras.layers import Layer
import tensorflow as tf


class SelfAttention(Layer):

    def __init__(self, initializer=tf.contrib.layers.xavier_initializer(), **kwargs):
        self.initializer = initializer
        super(SelfAttention, self).__init__(**kwargs)


    def build(self, input_shape):
        # input_shape : (batch_size, size1, size2, channel)
        self.channel = input_shape[-1]
        self.size1 = input_shape[1]
        self.size2 = input_shape[2]
        super(SelfAttention, self).build(input_shape)

    def call(self, inputs):
        inputs_reshape = tf.reshape(inputs, (-1, self.size1*self.size2, self.channel)) ## (batch_size, size1*size2, channel)
        inputs_transpose = tf.transpose(inputs_reshape, [0,2,1]) ## (batch_size, channel, size1*size2)
        r1 = tf.layers.dense(inputs_reshape, self.channel, kernel_initializer=self.initializer) ## (batch_size, size1*size2, channel)
        r2 = tf.matmul(r1, inputs_transpose) ## (batch_size, size1*size2, size1*size2)
        Score_matrix = tf.nn.softmax(r2, axis=2) ## (batch_size, size1*size2, size1*size2)
        outputs = tf.matmul(Score_matrix, inputs_reshape) ## (batch_size, size1*size2, channel)
        outputs = tf.reshape(outputs, (-1, self.size1, self.size2, self.channel)) ## (batch_size, size1, size2, channel)
        return outputs

    def compute_output_shape(self, input_shape):

        return input_shape
	# -- coding: utf-8 --
	"""
	Created on Sat Apr 6 10:47:30 2019

	@author: jbk48
	"""

	from keras.layers import Layer
	import tensorflow as tf


	class SelfAttention(Layer):

	def __init__(self, initializer=tf.contrib.layers.xavier_initializer(), **kwargs):
	self.initializer = initializer
	super(SelfAttention, self).__init__(**kwargs)


	def build(self, input_shape):
	# input_shape : (batch_size, size1, size2, channel)
	self.channel = input_shape[-1]
	self.size1 = input_shape[1]
	self.size2 = input_shape[2]
	super(SelfAttention, self).build(input_shape)

	def call(self, inputs):
	inputs_reshape = tf.reshape(inputs, (-1, self.size1self.size2, self.channel)) ## (batch_size, size1size2, channel)
	inputs_transpose = tf.transpose(inputs_reshape, [0,2,1]) ## (batch_size, channel, size1*size2)
	r1 = tf.layers.dense(inputs_reshape, self.channel, kernel_initializer=self.initializer) ## (batch_size, size1*size2, channel)
	r2 = tf.matmul(r1, inputs_transpose) ## (batch_size, size1size2, size1size2)
	Score_matrix = tf.nn.softmax(r2, axis=2) ## (batch_size, size1size2, size1size2)
	outputs = tf.matmul(Score_matrix, inputs_reshape) ## (batch_size, size1*size2, channel)
	outputs = tf.reshape(outputs, (-1, self.size1, self.size2, self.channel)) ## (batch_size, size1, size2, channel)
	return outputs

	def compute_output_shape(self, input_shape):

	return input_shape