-
-
Save nigeljyng/37552fb4869a5e81338f82b338a304d3 to your computer and use it in GitHub Desktop.
class AttentionWithContext(Layer): | |
""" | |
Attention operation, with a context/query vector, for temporal data. | |
Supports Masking. | |
Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf] | |
"Hierarchical Attention Networks for Document Classification" | |
by using a context vector to assist the attention | |
# Input shape | |
3D tensor with shape: `(samples, steps, features)`. | |
# Output shape | |
2D tensor with shape: `(samples, features)`. | |
:param kwargs: | |
Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True. | |
The dimensions are inferred based on the output shape of the RNN. | |
Example: | |
model.add(LSTM(64, return_sequences=True)) | |
model.add(AttentionWithContext()) | |
""" | |
def __init__(self, | |
W_regularizer=None, u_regularizer=None, b_regularizer=None, | |
W_constraint=None, u_constraint=None, b_constraint=None, | |
bias=True, **kwargs): | |
self.supports_masking = True | |
self.init = initializations.get('glorot_uniform') | |
self.W_regularizer = regularizers.get(W_regularizer) | |
self.u_regularizer = regularizers.get(u_regularizer) | |
self.b_regularizer = regularizers.get(b_regularizer) | |
self.W_constraint = constraints.get(W_constraint) | |
self.u_constraint = constraints.get(u_constraint) | |
self.b_constraint = constraints.get(b_constraint) | |
self.bias = bias | |
super(AttentionWithContext, self).__init__(**kwargs) | |
def build(self, input_shape): | |
assert len(input_shape) == 3 | |
self.W = self.add_weight((input_shape[-1], input_shape[-1],), | |
initializer=self.init, | |
name='{}_W'.format(self.name), | |
regularizer=self.W_regularizer, | |
constraint=self.W_constraint) | |
if self.bias: | |
self.b = self.add_weight((input_shape[-1],), | |
initializer='zero', | |
name='{}_b'.format(self.name), | |
regularizer=self.b_regularizer, | |
constraint=self.b_constraint) | |
self.u = self.add_weight((input_shape[-1],), | |
initializer=self.init, | |
name='{}_u'.format(self.name), | |
regularizer=self.u_regularizer, | |
constraint=self.u_constraint) | |
super(AttentionWithContext, self).build(input_shape) | |
def compute_mask(self, input, input_mask=None): | |
# do not pass the mask to the next layers | |
return None | |
def call(self, x, mask=None): | |
uit = K.dot(x, self.W) | |
if self.bias: | |
uit += self.b | |
uit = K.tanh(uit) | |
ait = K.dot(uit, self.u) | |
a = K.exp(ait) | |
# apply mask after the exp. will be re-normalized next | |
if mask is not None: | |
# Cast the mask to floatX to avoid float64 upcasting in theano | |
a *= K.cast(mask, K.floatx()) | |
# in some cases especially in the early stages of training the sum may be almost zero | |
# and this results in NaN's. A workaround is to add a very small positive number ε to the sum. | |
# a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) | |
a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) | |
a = K.expand_dims(a) | |
weighted_input = x * a | |
return K.sum(weighted_input, axis=1) | |
def get_output_shape_for(self, input_shape): | |
return input_shape[0], input_shape[-1] | |
def compute_output_shape(self, input_shape): | |
"""Shape transformation logic so Keras can infer output shape | |
""" | |
return (input_shape[0], input_shape[-1]) | |
Just tried rmdort's fork. The issue reported by abali96 disappears! Also, he added tensor shapes in comments and this helps to understand what happens under the hood.
I am getting
Traceback (most recent call last):
File "test.py", line 25, in
from attention import AttentionWithContext
File "/ssd/MachineLearning/Python/NLP/SplitAndSpellSentence/attention.py", line 1, in
class AttentionWithContext(Layer):
NameError: name 'Layer' is not defined
Code is simple
model = Sequential()
model.add(recurrent.GRU(hidden_neurons, input_shape=( CONFIG.max_input_wordchunk_len, len(chars)),
return_sequences=True,
kernel_initializer=CONFIG.initialization, activation='linear'))
model.add(AttentionWithContext())
model.add(Dense(len(chars), activation='sigmoid',kernel_initializer=CONFIG.initialization))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
Edit: My bad. from keras.engine.topology import Layer resolved.
I dont know why, but, getting dimension error.
Code:
def generate_model(output_len, chars=None):
"""Generate the model"""
print('Building model...')
chars = chars or CHARS
in_out_neurons = CONFIG.max_input_len
hidden_neurons = CONFIG.hidden_size
model = Sequential()
model.add(recurrent.GRU(512, input_shape=( 128, 100),
return_sequences=True,
kernel_initializer=CONFIG.initialization, activation='linear'))
model.add(AttentionWithContext())
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
return model
and the error is
________________________________
_________________________________
Layer (type) Output Shape Param #
=================================================================
gru_1 (GRU) (None, 128, 512) 941568
_________________________________________________________________
attention_with_context_1 (At (None, 512) 263168
=================================================================
Total params: 1,204,736
Trainable params: 1,204,736
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/500
Traceback (most recent call last):
File "test.py", line 580, in <module>
train_speller()
File "test.py", line 482, in train_speller
itarative_train(model)
File "test.py", line 467, in itarative_train
class_weight=None, max_queue_size=10, workers=1)
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/models.py", line 1315, in fit_generator
initial_epoch=initial_epoch)
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 2230, in fit_generator
class_weight=class_weight)
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1877, in train_on_batch
class_weight=class_weight)
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1480, in _standardize_user_data
exception_prefix='target')
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 113, in _standardize_input_data
'with shape ' + str(data_shape))
ValueError: Error when checking target: expected attention_with_context_1 to have 2 dimensions, but got array with shape (64, 128, 100)
Any idea?
as output share is anyway 3dim, i tried to change line 81 as
return (input_shape[0], input_shape[1],input_shape[2])
then for different error and model is not compiling
Is the issue "IndexError: pop index out of range" resolved