Skip to content

Instantly share code, notes, and snippets.

@monk1337
Created May 12, 2018 13:57
Show Gist options
  • Save monk1337/d09e6d8ac58fdf23fb150f7b3af5dc88 to your computer and use it in GitHub Desktop.
Save monk1337/d09e6d8ac58fdf23fb150f7b3af5dc88 to your computer and use it in GitHub Desktop.
def additive_attention(ref, query, ref_dim, qdim,
normalize=False, blend=False):
# infer timesteps
timesteps = tf.shape(ref)[1]
U = tf.get_variable('U',
shape=[ref_dim, qdim],
dtype=tf.float32,
initializer=tf.random_uniform_initializer(-0.01, 0.01))
V = tf.get_variable('V',
shape=[qdim, qdim],
dtype=tf.float32,
initializer=tf.random_uniform_initializer(-0.01, 0.01))
Av = tf.get_variable('Av',
shape=[qdim, 1],
dtype=tf.float32,
initializer=tf.random_uniform_initializer(-0.01, 0.01))
# NOTE : reference should be in batch_major format
ref_proj = tf.reshape(
tf.matmul(tf.reshape(ref, [-1, ref_dim]), U), # collapse dims to matmul
[-1, timesteps, qdim]) # expand again
hi = tf.expand_dims(tf.matmul(query, V),
axis=1) # expand time dim to add to reference
# sum up ref, query
blended = (ref_proj + hi)
scores = tf.reshape(tf.matmul(
tf.reshape(blended, [-1, qdim]), # collapse dims
Av), # matmul with attention vector
[-1, timesteps]) # attention weights across timesteps
# normalize scores
probs = tf.nn.softmax(scores)
if normalize:
return probs
if blend: # reduce reference based on attention weights
return tf.reduce_sum(ref * tf.expand_dims(probs, axis=-1),
axis=1) # reduce across time dimension
return scores # return score
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment