|
# first, imports Keras |
|
from keras.engine import Layer |
|
from keras.layers import ( |
|
K, |
|
Activation, |
|
Bidirectional, |
|
Dense, |
|
Dropout, |
|
Embedding, |
|
Flatten, |
|
GRU, |
|
Input, |
|
SpatialDropout1D, |
|
) |
|
from keras.models import Model |
|
|
|
# CapsNet params |
|
LEN_GRU = 128 |
|
N_ROUTINGS = 5 |
|
N_CAPS = 10 |
|
DIM_CAPS = 16 |
|
DROPOUT_PROBA = 0.3 |
|
DROP_DENSE_RATE = 0.3 |
|
|
|
|
|
def squash(x, axis=-1): |
|
"""The implementation of the squash in the paper""" |
|
squared_norm = K.sum(K.square(x), axis, keepdims=True) |
|
scale_factor = K.sqrt(squared_norm + K.epsilon()) |
|
return x / scale_factor |
|
|
|
|
|
class Capsule(Layer): |
|
"""The Capsule layer!""" |
|
|
|
def __init__( |
|
self, |
|
n_caps, |
|
dim_caps, |
|
n_routings=3, |
|
activation="default", |
|
share_weights=True, |
|
kernel_size=(9, 1), |
|
**kwargs |
|
): |
|
super(Capsule, self).__init__(**kwargs) |
|
self.n_caps = n_caps |
|
self.dim_caps = dim_caps |
|
self.n_routings = n_routings |
|
self.share_weights = share_weights |
|
self.kernel_size = kernel_size |
|
|
|
if activation == "default": |
|
self.activation = squash |
|
else: |
|
self.activation = Activation(activation) |
|
|
|
def build(self, input_shape): |
|
super(Capsule, self).build(input_shape) |
|
input_dim_caps = input_shape[-1] |
|
|
|
# Capsule kernel weights and weights init |
|
if self.share_weights: |
|
self.W = self.add_weight( |
|
name="capsule kernel", |
|
shape=(1, input_dim_caps, self.n_caps * self.dim_caps), |
|
initializer="glorot_uniform", |
|
trainable=True, |
|
) |
|
else: |
|
input_n_caps = input_shape[-2] |
|
self.W = self.add_weight( |
|
name="capsule kernel", |
|
shape=(input_n_caps, input_dim_caps, self.n_caps * self.dim_caps), |
|
initializer="glorot_uniform", |
|
trainable=True, |
|
) |
|
|
|
def call(self, u_vecs): |
|
batch_size = K.shape(u_vecs)[0] |
|
input_n_caps = K.shape(u_vecs)[1] |
|
|
|
if self.share_weights: |
|
u_hat_vecs = K.conv1d(u_vecs, self.W) |
|
else: |
|
u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1]) |
|
|
|
u_hat_vecs = K.reshape( |
|
u_hat_vecs, (batch_size, input_n_caps, self.n_caps, self.dim_caps) |
|
) |
|
u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3)) |
|
|
|
h = K.zeros_like(u_hat_vecs[:, :, :, 0]) |
|
for i in range(self.n_routings): |
|
h = K.permute_dimensions(h, (0, 2, 1)) |
|
c = K.softmax(h) |
|
c = K.permute_dimensions(c, (0, 2, 1)) |
|
h = K.permute_dimensions(h, (0, 2, 1)) |
|
outs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2])) |
|
if i < self.n_routings - 1: |
|
h = K.batch_dot(outs, u_hat_vecs, [2, 3]) |
|
|
|
return outs |
|
|
|
def compute_out_shape(self, input_shape): |
|
return (None, self.n_caps, self.dim_caps) |
|
|
|
|
|
def build_model(embedding, sequence_length): |
|
input1 = Input(shape=(sequence_length,)) |
|
embedding_layer = Embedding( |
|
embedding.shape[0], embedding.shape[1], weights=[embedding], trainable=False |
|
)(input1) |
|
embedding_layer = SpatialDropout1D(DROP_DENSE_RATE)(embedding_layer) |
|
|
|
x = Bidirectional( |
|
GRU( |
|
LEN_GRU, |
|
activation="relu", |
|
dropout=DROPOUT_PROBA, |
|
recurrent_dropout=DROPOUT_PROBA, |
|
return_sequences=True, |
|
) |
|
)(embedding_layer) |
|
|
|
capsule = Capsule( |
|
n_caps=N_CAPS, dim_caps=DIM_CAPS, routings=N_ROUTINGS, share_weights=True |
|
)(x) |
|
|
|
capsule = Flatten()(capsule) |
|
capsule = Dropout(DROPOUT_PROBA)(capsule) |
|
output = Dense(1, activation="sigmoid")(capsule) |
|
model = Model(inputs=input1, outputs=output) |
|
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) |
|
return model |