Skip to content

Instantly share code, notes, and snippets.

@monk1337
Created May 12, 2018 14:00
Show Gist options
  • Save monk1337/7d5a2bad3ef684d27a73a88e515f4e4c to your computer and use it in GitHub Desktop.
Save monk1337/7d5a2bad3ef684d27a73a88e515f4e4c to your computer and use it in GitHub Desktop.
import tensorflow as tf
import numpy as np
def attention(inputs, attention_size, time_major=False, return_alphas=False):
if isinstance(inputs, tuple):
inputs = tf.concat(inputs, 2)
if time_major:
inputs = tf.array_ops.transpose(inputs, [1, 0, 2])
inputs = tf.transpose(inputs, [1, 0, 2])
sequence_length = inputs.shape[1].value # the length of sequences processed in the antecedent RNN layer
hidden_size = inputs.shape[2].value # hidden size of the RNN layer
# Attention mechanism
W_omega = tf.Variable(tf.random_normal([hidden_size, attention_size], stddev=0.1))
b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
v = tf.tanh(tf.matmul(tf.reshape(inputs, [-1, hidden_size]), W_omega) + tf.reshape(b_omega, [1, -1]))
vu = tf.matmul(v, tf.reshape(u_omega, [-1, 1]))
exps = tf.reshape(tf.exp(vu), [-1, sequence_length])
alphas = exps / tf.reshape(tf.reduce_sum(exps, 1), [-1, 1])
# Output of Bi-RNN is reduced with attention vector
output = tf.reduce_sum(inputs * tf.reshape(alphas, [-1, sequence_length, 1]), 1)
if not return_alphas:
return output
else:
return output, alphas
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment