Skip to content

Instantly share code, notes, and snippets.

@nigeljyng
Forked from cbaziotis/AttentionWithContext.py
Last active February 10, 2021 14:02
Show Gist options
  • Star 11 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save nigeljyng/37552fb4869a5e81338f82b338a304d3 to your computer and use it in GitHub Desktop.
Save nigeljyng/37552fb4869a5e81338f82b338a304d3 to your computer and use it in GitHub Desktop.
Keras Layer that implements an Attention mechanism, with a context/query vector, for temporal data. Supports Masking. Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf] "Hierarchical Attention Networks for Document Classification"
class AttentionWithContext(Layer):
"""
Attention operation, with a context/query vector, for temporal data.
Supports Masking.
Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf]
"Hierarchical Attention Networks for Document Classification"
by using a context vector to assist the attention
# Input shape
3D tensor with shape: `(samples, steps, features)`.
# Output shape
2D tensor with shape: `(samples, features)`.
:param kwargs:
Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
The dimensions are inferred based on the output shape of the RNN.
Example:
model.add(LSTM(64, return_sequences=True))
model.add(AttentionWithContext())
"""
def __init__(self,
W_regularizer=None, u_regularizer=None, b_regularizer=None,
W_constraint=None, u_constraint=None, b_constraint=None,
bias=True, **kwargs):
self.supports_masking = True
self.init = initializations.get('glorot_uniform')
self.W_regularizer = regularizers.get(W_regularizer)
self.u_regularizer = regularizers.get(u_regularizer)
self.b_regularizer = regularizers.get(b_regularizer)
self.W_constraint = constraints.get(W_constraint)
self.u_constraint = constraints.get(u_constraint)
self.b_constraint = constraints.get(b_constraint)
self.bias = bias
super(AttentionWithContext, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
self.W = self.add_weight((input_shape[-1], input_shape[-1],),
initializer=self.init,
name='{}_W'.format(self.name),
regularizer=self.W_regularizer,
constraint=self.W_constraint)
if self.bias:
self.b = self.add_weight((input_shape[-1],),
initializer='zero',
name='{}_b'.format(self.name),
regularizer=self.b_regularizer,
constraint=self.b_constraint)
self.u = self.add_weight((input_shape[-1],),
initializer=self.init,
name='{}_u'.format(self.name),
regularizer=self.u_regularizer,
constraint=self.u_constraint)
super(AttentionWithContext, self).build(input_shape)
def compute_mask(self, input, input_mask=None):
# do not pass the mask to the next layers
return None
def call(self, x, mask=None):
uit = K.dot(x, self.W)
if self.bias:
uit += self.b
uit = K.tanh(uit)
ait = K.dot(uit, self.u)
a = K.exp(ait)
# apply mask after the exp. will be re-normalized next
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting in theano
a *= K.cast(mask, K.floatx())
# in some cases especially in the early stages of training the sum may be almost zero
# and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
# a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
a = K.expand_dims(a)
weighted_input = x * a
return K.sum(weighted_input, axis=1)
def get_output_shape_for(self, input_shape):
return input_shape[0], input_shape[-1]
def compute_output_shape(self, input_shape):
"""Shape transformation logic so Keras can infer output shape
"""
return (input_shape[0], input_shape[-1])
@rmdort
Copy link

rmdort commented May 26, 2017

I have adapted the code for tensorflow and keras 2. Here is the fork
https://gist.github.com/rmdort/596e75e864295365798836d9e8636033

@bicepjai
Copy link

Is the issue "IndexError: pop index out of range" resolved

@leocnj
Copy link

leocnj commented Apr 28, 2018

Just tried rmdort's fork. The issue reported by abali96 disappears! Also, he added tensor shapes in comments and this helps to understand what happens under the hood.

@sekarpdkt
Copy link

sekarpdkt commented Apr 28, 2018

I am getting

Traceback (most recent call last):
File "test.py", line 25, in
from attention import AttentionWithContext
File "/ssd/MachineLearning/Python/NLP/SplitAndSpellSentence/attention.py", line 1, in
class AttentionWithContext(Layer):
NameError: name 'Layer' is not defined

Code is simple

    model = Sequential()  
    model.add(recurrent.GRU(hidden_neurons, input_shape=( CONFIG.max_input_wordchunk_len, len(chars)), 
                             return_sequences=True, 
                            kernel_initializer=CONFIG.initialization, activation='linear'))
    model.add(AttentionWithContext())
    model.add(Dense(len(chars), activation='sigmoid',kernel_initializer=CONFIG.initialization))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

Edit: My bad. from keras.engine.topology import Layer resolved.

@sekarpdkt
Copy link

I dont know why, but, getting dimension error.
Code:


def generate_model(output_len, chars=None):
    """Generate the model"""
    print('Building model...')
    chars = chars or CHARS

    in_out_neurons = CONFIG.max_input_len  
    hidden_neurons = CONFIG.hidden_size
    
    model = Sequential()  
    model.add(recurrent.GRU(512, input_shape=( 128, 100), 
                             return_sequences=True, 
                            kernel_initializer=CONFIG.initialization, activation='linear'))
    model.add(AttentionWithContext())
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
    return model


and the error is


________________________________
_________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
gru_1 (GRU)                  (None, 128, 512)          941568    
_________________________________________________________________
attention_with_context_1 (At (None, 512)               263168    
=================================================================
Total params: 1,204,736
Trainable params: 1,204,736
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/500
Traceback (most recent call last):
  File "test.py", line 580, in <module>
    train_speller()
  File "test.py", line 482, in train_speller
    itarative_train(model)
  File "test.py", line 467, in itarative_train
    class_weight=None, max_queue_size=10, workers=1)
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/models.py", line 1315, in fit_generator
    initial_epoch=initial_epoch)
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 2230, in fit_generator
    class_weight=class_weight)
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1877, in train_on_batch
    class_weight=class_weight)
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1480, in _standardize_user_data
    exception_prefix='target')
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 113, in _standardize_input_data
    'with shape ' + str(data_shape))
ValueError: Error when checking target: expected attention_with_context_1 to have 2 dimensions, but got array with shape (64, 128, 100)

Any idea?

as output share is anyway 3dim, i tried to change line 81 as

return (input_shape[0], input_shape[1],input_shape[2])

then for different error and model is not compiling

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment