Skip to content

Instantly share code, notes, and snippets.

@eliorc
Created July 7, 2019 07:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eliorc/bc2fcfa0f8db7c4a897bc999aceaa324 to your computer and use it in GitHub Desktop.
Save eliorc/bc2fcfa0f8db7c4a897bc999aceaa324 to your computer and use it in GitHub Desktop.
DME
class DynamicMetaEmbedding(tf.keras.layers.Layer):
def __init__(self,
embedding_matrices: List[tf.keras.layers.Embedding],
output_dim: Optional[int] = None,
name: str = 'dynamic_meta_embedding',
**kwargs):
"""
:param embedding_matrices: List of embedding layers
:param output_dim: Dimension of the output embedding
:param name: Layer name
"""
super().__init__(name=name, **kwargs)
# Validate all the embedding matrices have the same vocabulary size
if not len(set((e.input_dim for e in embedding_matrices))) == 1:
raise ValueError('Vocabulary sizes (first dimension) of all embedding matrices must match')
# If no output_dim is supplied, use the maximum dimension from the given matrices
self.output_dim = output_dim or min([e.output_dim for e in embedding_matrices])
self.embedding_matrices = embedding_matrices
self.n_embeddings = len(self.embedding_matrices)
self.projections = [tf.keras.layers.Dense(units=self.output_dim,
activation=None,
name='projection_{}'.format(i),
dtype=self.dtype) for i, e in enumerate(self.embedding_matrices)]
self.attention = tf.keras.layers.Dense(units=1,
activation=None,
name='attention',
dtype=self.dtype)
def call(self, inputs,
**kwargs) -> tf.Tensor:
batch_size, time_steps = inputs.shape[:2]
# Embedding lookup
embedded = [e(inputs) for e in self.embedding_matrices] # List of shape=(batch_size, time_steps, channels_i)
# Projection
projected = tf.reshape(tf.concat([p(e) for p, e in zip(self.projections, embedded)], axis=-1),
# Project embeddings
shape=(batch_size, time_steps, -1, self.output_dim),
name='projected') # shape=(batch_size, time_steps, n_embeddings, output_dim)
# Calculate attention coefficients
alphas = self.attention(projected) # shape=(batch_size, time_steps, n_embeddings, 1)
alphas = tf.nn.softmax(alphas, axis=-2) # shape=(batch_size, time_steps, n_embeddings, 1)
# Attend
output = tf.squeeze(tf.matmul(
tf.transpose(projected, perm=[0, 1, 3, 2]), alphas), # Attending
name='output') # shape=(batch_size, time_steps, output_dim)
return output
@sadakmed
Copy link

sadakmed commented Apr 10, 2021

        self.output_dim = output_dim or min([e.output_dim for e in embedding_matrices])

don't you think this one should be max not min

and this one!!

        self.projections = [tf.keras.layers.Dense(units=self.output_dim,
                                                  activation=None,
                                                  name='projection_{}'.format(i),
                                                  dtype=self.dtype) for i in range(self.n_embeddings)]

@eliorc
Copy link
Author

eliorc commented Apr 11, 2021

Actually I thought about it, but I figured it is better to project to lower dimensions than higher

Anyway I appreciate you got here :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment