Skip to content

Instantly share code, notes, and snippets.

@nigeljyng
Forked from cbaziotis/AttentionWithContext.py
Last active February 10, 2021 14:02
Show Gist options
  • Star 11 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save nigeljyng/37552fb4869a5e81338f82b338a304d3 to your computer and use it in GitHub Desktop.
Save nigeljyng/37552fb4869a5e81338f82b338a304d3 to your computer and use it in GitHub Desktop.
Keras Layer that implements an Attention mechanism, with a context/query vector, for temporal data. Supports Masking. Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf] "Hierarchical Attention Networks for Document Classification"
class AttentionWithContext(Layer):
"""
Attention operation, with a context/query vector, for temporal data.
Supports Masking.
Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf]
"Hierarchical Attention Networks for Document Classification"
by using a context vector to assist the attention
# Input shape
3D tensor with shape: `(samples, steps, features)`.
# Output shape
2D tensor with shape: `(samples, features)`.
:param kwargs:
Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
The dimensions are inferred based on the output shape of the RNN.
Example:
model.add(LSTM(64, return_sequences=True))
model.add(AttentionWithContext())
"""
def __init__(self,
W_regularizer=None, u_regularizer=None, b_regularizer=None,
W_constraint=None, u_constraint=None, b_constraint=None,
bias=True, **kwargs):
self.supports_masking = True
self.init = initializations.get('glorot_uniform')
self.W_regularizer = regularizers.get(W_regularizer)
self.u_regularizer = regularizers.get(u_regularizer)
self.b_regularizer = regularizers.get(b_regularizer)
self.W_constraint = constraints.get(W_constraint)
self.u_constraint = constraints.get(u_constraint)
self.b_constraint = constraints.get(b_constraint)
self.bias = bias
super(AttentionWithContext, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
self.W = self.add_weight((input_shape[-1], input_shape[-1],),
initializer=self.init,
name='{}_W'.format(self.name),
regularizer=self.W_regularizer,
constraint=self.W_constraint)
if self.bias:
self.b = self.add_weight((input_shape[-1],),
initializer='zero',
name='{}_b'.format(self.name),
regularizer=self.b_regularizer,
constraint=self.b_constraint)
self.u = self.add_weight((input_shape[-1],),
initializer=self.init,
name='{}_u'.format(self.name),
regularizer=self.u_regularizer,
constraint=self.u_constraint)
super(AttentionWithContext, self).build(input_shape)
def compute_mask(self, input, input_mask=None):
# do not pass the mask to the next layers
return None
def call(self, x, mask=None):
uit = K.dot(x, self.W)
if self.bias:
uit += self.b
uit = K.tanh(uit)
ait = K.dot(uit, self.u)
a = K.exp(ait)
# apply mask after the exp. will be re-normalized next
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting in theano
a *= K.cast(mask, K.floatx())
# in some cases especially in the early stages of training the sum may be almost zero
# and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
# a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
a = K.expand_dims(a)
weighted_input = x * a
return K.sum(weighted_input, axis=1)
def get_output_shape_for(self, input_shape):
return input_shape[0], input_shape[-1]
def compute_output_shape(self, input_shape):
"""Shape transformation logic so Keras can infer output shape
"""
return (input_shape[0], input_shape[-1])
@sekarpdkt
Copy link

I dont know why, but, getting dimension error.
Code:


def generate_model(output_len, chars=None):
    """Generate the model"""
    print('Building model...')
    chars = chars or CHARS

    in_out_neurons = CONFIG.max_input_len  
    hidden_neurons = CONFIG.hidden_size
    
    model = Sequential()  
    model.add(recurrent.GRU(512, input_shape=( 128, 100), 
                             return_sequences=True, 
                            kernel_initializer=CONFIG.initialization, activation='linear'))
    model.add(AttentionWithContext())
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
    return model


and the error is


________________________________
_________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
gru_1 (GRU)                  (None, 128, 512)          941568    
_________________________________________________________________
attention_with_context_1 (At (None, 512)               263168    
=================================================================
Total params: 1,204,736
Trainable params: 1,204,736
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/500
Traceback (most recent call last):
  File "test.py", line 580, in <module>
    train_speller()
  File "test.py", line 482, in train_speller
    itarative_train(model)
  File "test.py", line 467, in itarative_train
    class_weight=None, max_queue_size=10, workers=1)
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/models.py", line 1315, in fit_generator
    initial_epoch=initial_epoch)
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 2230, in fit_generator
    class_weight=class_weight)
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1877, in train_on_batch
    class_weight=class_weight)
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1480, in _standardize_user_data
    exception_prefix='target')
  File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 113, in _standardize_input_data
    'with shape ' + str(data_shape))
ValueError: Error when checking target: expected attention_with_context_1 to have 2 dimensions, but got array with shape (64, 128, 100)

Any idea?

as output share is anyway 3dim, i tried to change line 81 as

return (input_shape[0], input_shape[1],input_shape[2])

then for different error and model is not compiling

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment