mcleonard/attention.py

## attention.py
import numpy as np

def softmax(x, axis=0):
    """ Calculate softmax function for an array x

        axis=0 calculates softmax across rows which means each column sums to 1
        axis=1 calculates softmax across columns which means each row sums to 1
    """
    return np.exp(x) / np.expand_dims(np.sum(np.exp(x), axis=axis), axis)

def attention(encoder_vectors, decoder_vector):
    """ Example function that calculates attention, returns the context vector

        Arguments:
        encoder_vectors: NxM numpy array, where N is the number of vectors and M is the vector length
        decoder_vector: 1xM numpy array, M is the vector length, much be the same M as encoder_vectors
    """

    # First, calculate the dot product of each encoder vector with the decoder vector
    dot_prod = np.matmul(encoder_vectors, decoder_vector.T)

    # Then take the softmax of those dot products to get a weight distribution
    scores = softmax(dot_prod)

    # Use those weights to scale encoder_vectors to get the alignment vectors
    alignment = encoder_vectors * scores

    # Sum up alignment vectors to get the context vector and return it
    context = alignment.sum(axis=0)
    return context


# Fake vectors used as an example
encoder_vectors = np.random.randn(10, 128)
decoder_vector = np.random.randn(1, 128)

context_vector = attention(encoder_vectors, decoder_vector)

print(context_vector)
	import numpy as np

	def softmax(x, axis=0):
	""" Calculate softmax function for an array x

	axis=0 calculates softmax across rows which means each column sums to 1
	axis=1 calculates softmax across columns which means each row sums to 1
	"""
	return np.exp(x) / np.expand_dims(np.sum(np.exp(x), axis=axis), axis)

	def attention(encoder_vectors, decoder_vector):
	""" Example function that calculates attention, returns the context vector

	Arguments:
	encoder_vectors: NxM numpy array, where N is the number of vectors and M is the vector length
	decoder_vector: 1xM numpy array, M is the vector length, much be the same M as encoder_vectors
	"""

	# First, calculate the dot product of each encoder vector with the decoder vector
	dot_prod = np.matmul(encoder_vectors, decoder_vector.T)

	# Then take the softmax of those dot products to get a weight distribution
	scores = softmax(dot_prod)

	# Use those weights to scale encoder_vectors to get the alignment vectors
	alignment = encoder_vectors * scores

	# Sum up alignment vectors to get the context vector and return it
	context = alignment.sum(axis=0)
	return context


	# Fake vectors used as an example
	encoder_vectors = np.random.randn(10, 128)
	decoder_vector = np.random.randn(1, 128)

	context_vector = attention(encoder_vectors, decoder_vector)

	print(context_vector)