Skip to content

Instantly share code, notes, and snippets.

@marekgalovic
Last active November 18, 2019 23:46
Show Gist options
  • Save marekgalovic/a1a4073b917ae1b18dc7413436794dca to your computer and use it in GitHub Desktop.
Save marekgalovic/a1a4073b917ae1b18dc7413436794dca to your computer and use it in GitHub Desktop.
import tensorflow as tf
import numpy as np
class ConvolutionalAttentionNLI(object):
def __init__(self, embeddings_shape, target_classes=2, conv_filter_size=3, conv_projection_size=300, attention_output_size=200, comparison_output_size=100, learning_rate=0.05):
self._embeddings_shape = embeddings_shape
self._target_classes = target_classes
self._conv_filter_size = conv_filter_size
self._conv_projection_size = conv_projection_size
self._attention_output_size = attention_output_size
self._comparison_output_size = comparison_output_size
self._learning_rate = learning_rate
self._build_graph()
def _build_graph(self):
self.graph = tf.Graph()
with self.graph.as_default():
self._init_embeddings()
self._init_placeholders()
self._embeddings_lookup()
self._convolutional_layer()
self._attention_layer()
self._comparison_layer()
self._aggregation_layer()
self._classification_layer()
self._init_optimizer()
self.metrics = tf.summary.merge_all()
self.saver = tf.train.Saver(max_to_keep=None)
def _init_embeddings(self):
self._embeddings = tf.Variable(tf.zeros(self._embeddings_shape), name='word_embeddings', trainable=False)
self.embeddings_placeholder = tf.placeholder(tf.float32, self._embeddings_shape)
self.embeddings_init_op = self._embeddings.assign(self.embeddings_placeholder)
def _init_placeholders(self):
self.X1 = tf.placeholder(tf.int32, [None, None])
self.X2 = tf.placeholder(tf.int32, [None, None])
self.X1_len = tf.placeholder(tf.int32, [None])
self.X2_len = tf.placeholder(tf.int32, [None])
self.targets = tf.placeholder(tf.int64, [None])
self._targets_onehot = tf.one_hot(self.targets, self._target_classes)
self.is_training = tf.placeholder(tf.bool)
self.dropout = tf.placeholder(tf.float32)
def _embeddings_lookup(self):
with tf.name_scope('embeddings_lookup'):
self._X1_embedded = tf.nn.embedding_lookup(self._embeddings, self.X1)
self._X2_embedded = tf.nn.embedding_lookup(self._embeddings, self.X2)
def _conv_pad(self, values):
with tf.name_scope('convolutional_padding'):
pad = tf.zeros([tf.shape(self.X1)[0], 1, self._embeddings_shape[1]])
return tf.concat([pad, values, pad], 1)
def _convolutional_layer(self):
with tf.name_scope('convolutional_layer'):
X1_conv_1 = tf.layers.conv1d(
self._conv_pad(self._X1_embedded),
self._conv_projection_size,
self._conv_filter_size,
padding='valid',
use_bias=False,
name='conv_1',
)
X2_conv_1 = tf.layers.conv1d(
self._conv_pad(self._X2_embedded),
self._conv_projection_size,
self._conv_filter_size,
padding='valid',
use_bias=False,
name='conv_1',
reuse=True
)
X1_conv_1 = tf.layers.dropout(X1_conv_1, rate=self.dropout, training=self.is_training)
X2_conv_1 = tf.layers.dropout(X2_conv_1, rate=self.dropout, training=self.is_training)
X1_conv_2 = tf.layers.conv1d(
self._conv_pad(X1_conv_1),
self._conv_projection_size,
self._conv_filter_size,
padding='valid',
use_bias=False,
name='conv_2',
)
X2_conv_2 = tf.layers.conv1d(
self._conv_pad(X2_conv_1),
self._conv_projection_size,
self._conv_filter_size,
padding='valid',
use_bias=False,
name='conv_2',
reuse=True
)
self._X1_conv = tf.layers.dropout(X1_conv_2, rate=self.dropout, training=self.is_training)
self._X2_conv = tf.layers.dropout(X2_conv_2, rate=self.dropout, training=self.is_training)
def _attention_layer(self):
with tf.name_scope('attention_layer'):
e_X1 = tf.layers.dense(self._X1_conv, self._attention_output_size, activation=tf.nn.relu, name='attention_nn')
e_X2 = tf.layers.dense(self._X2_conv, self._attention_output_size, activation=tf.nn.relu, name='attention_nn', reuse=True)
e = tf.matmul(e_X1, e_X2, transpose_b=True, name='e')
self._beta = tf.matmul(self._masked_softmax(e, self.X2_len), self._X2_conv, name='beta')
self._alpha = tf.matmul(self._masked_softmax(tf.transpose(e, [0,2,1]), self.X1_len), self._X1_conv, name='alpha')
def _masked_softmax(self, values, lengths):
with tf.name_scope('MaskedSoftmax'):
mask = tf.expand_dims(tf.sequence_mask(lengths, tf.reduce_max(lengths), dtype=tf.float32), -2)
inf_mask = (1 - mask) * -np.inf
inf_mask = tf.where(tf.is_nan(inf_mask), tf.zeros_like(inf_mask), inf_mask)
return tf.nn.softmax(tf.multiply(values, mask) + inf_mask)
def _comparison_layer(self):
with tf.name_scope('comparison_layer'):
X1_comp = tf.layers.dense(
tf.concat([self._X1_conv, self._beta], 2),
self._comparison_output_size,
activation=tf.nn.relu,
name='comparison_nn'
)
self._X1_comp = tf.multiply(
tf.layers.dropout(X1_comp, rate=self.dropout, training=self.is_training),
tf.expand_dims(tf.sequence_mask(self.X1_len, tf.reduce_max(self.X1_len), dtype=tf.float32), -1)
)
X2_comp = tf.layers.dense(
tf.concat([self._X2_conv, self._alpha], 2),
self._comparison_output_size,
activation=tf.nn.relu,
name='comparison_nn',
reuse=True
)
self._X2_comp = tf.multiply(
tf.layers.dropout(X2_comp, rate=self.dropout, training=self.is_training),
tf.expand_dims(tf.sequence_mask(self.X2_len, tf.reduce_max(self.X2_len), dtype=tf.float32), -1)
)
def _aggregation_layer(self):
with tf.name_scope('aggregation_layer'):
X1_agg = tf.reduce_sum(self._X1_comp, 1)
X2_agg = tf.reduce_sum(self._X2_comp, 1)
self._agg = tf.concat([X1_agg, X2_agg], 1)
def _classification_layer(self):
with tf.name_scope('classifier'):
L1 = tf.layers.dropout(
tf.layers.dense(self._agg, 100, activation=tf.nn.relu, name='L1'),
rate=self.dropout, training=self.is_training
)
self.y = tf.layers.dense(L1, self._target_classes, activation=tf.nn.softmax, name='y')
tf.summary.histogram('y', self.y)
def _init_optimizer(self):
self.loss = tf.losses.softmax_cross_entropy(self._targets_onehot, self.y)
self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.targets, tf.argmax(self.y, 1)), tf.float32))
self.optimizer = tf.train.AdagradOptimizer(learning_rate=self._learning_rate).minimize(self.loss)
tf.summary.scalar('loss', self.loss)
tf.summary.scalar('accuracy', self.accuracy)
import tensorflow as tf
import numpy as np
class DecomposableNLI(object):
def __init__(self, embeddings_shape, target_classes=2, attention_output_size=200, comparison_output_size=100, learning_rate=0.0001):
self._embeddings_shape = embeddings_shape
self._target_classes = target_classes
self._attention_output_size = attention_output_size
self._comparison_output_size = comparison_output_size
self._learning_rate = learning_rate
self._build_graph()
def _build_graph(self):
self.graph = tf.Graph()
with self.graph.as_default():
self._init_embeddings()
self._init_placeholders()
self._embeddings_lookup()
self._attention_layer()
self._comparison_layer()
self._aggregation_layer()
self._classification_layer()
self._init_optimizer()
self.metrics = tf.summary.merge_all()
self.saver = tf.train.Saver(max_to_keep=None)
def _init_embeddings(self):
self._embeddings = tf.Variable(tf.zeros(self._embeddings_shape), name='word_embeddings', trainable=False)
self.embeddings_placeholder = tf.placeholder(tf.float32, self._embeddings_shape)
self.embeddings_init_op = self._embeddings.assign(self.embeddings_placeholder)
def _init_placeholders(self):
self.X1 = tf.placeholder(tf.int32, [None, None])
self.X2 = tf.placeholder(tf.int32, [None, None])
self.X1_len = tf.placeholder(tf.int32, [None])
self.X2_len = tf.placeholder(tf.int32, [None])
self.targets = tf.placeholder(tf.int64, [None])
self._targets_onehot = tf.one_hot(self.targets, self._target_classes)
self.is_training = tf.placeholder(tf.bool)
self.dropout = tf.placeholder(tf.float32)
def _embeddings_lookup(self):
with tf.name_scope('embeddings_lookup'):
self._X1_embedded = tf.nn.embedding_lookup(self._embeddings, self.X1)
self._X2_embedded = tf.nn.embedding_lookup(self._embeddings, self.X2)
def _attention_layer(self):
with tf.name_scope('attention_layer'):
e_X1 = tf.layers.dense(self._X1_embedded, self._attention_output_size, activation=tf.nn.relu, name='attention_nn')
e_X2 = tf.layers.dense(self._X2_embedded, self._attention_output_size, activation=tf.nn.relu, name='attention_nn', reuse=True)
e = tf.matmul(e_X1, e_X2, transpose_b=True, name='e')
self._beta = tf.matmul(self._masked_softmax(e, self.X2_len), self._X2_embedded, name='beta')
self._alpha = tf.matmul(self._masked_softmax(tf.transpose(e, [0,2,1]), self.X1_len), self._X1_embedded, name='alpha')
def _masked_softmax(self, values, lengths):
with tf.name_scope('MaskedSoftmax'):
mask = tf.expand_dims(tf.sequence_mask(lengths, tf.reduce_max(lengths), dtype=tf.float32), -2)
inf_mask = (1 - mask) * -np.inf
inf_mask = tf.where(tf.is_nan(inf_mask), tf.zeros_like(inf_mask), inf_mask)
return tf.nn.softmax(tf.multiply(values, mask) + inf_mask)
def _comparison_layer(self):
with tf.name_scope('comparison_layer'):
X1_comp = tf.layers.dense(
tf.concat([self._X1_embedded, self._beta], 2),
self._comparison_output_size,
activation=tf.nn.relu,
name='comparison_nn'
)
self._X1_comp = tf.multiply(
tf.layers.dropout(X1_comp, rate=self.dropout, training=self.is_training),
tf.expand_dims(tf.sequence_mask(self.X1_len, tf.reduce_max(self.X1_len), dtype=tf.float32), -1)
)
X2_comp = tf.layers.dense(
tf.concat([self._X2_embedded, self._alpha], 2),
self._comparison_output_size,
activation=tf.nn.relu,
name='comparison_nn',
reuse=True
)
self._X2_comp = tf.multiply(
tf.layers.dropout(X2_comp, rate=self.dropout, training=self.is_training),
tf.expand_dims(tf.sequence_mask(self.X2_len, tf.reduce_max(self.X2_len), dtype=tf.float32), -1)
)
def _aggregation_layer(self):
with tf.name_scope('aggregation_layer'):
X1_agg = tf.reduce_sum(self._X1_comp, 1)
X2_agg = tf.reduce_sum(self._X2_comp, 1)
self._agg = tf.concat([X1_agg, X2_agg], 1)
def _classification_layer(self):
with tf.name_scope('classifier'):
L1 = tf.layers.dropout(
tf.layers.dense(self._agg, 100, activation=tf.nn.relu, name='L1'),
rate=self.dropout, training=self.is_training
)
self.y = tf.layers.dense(L1, self._target_classes, activation=tf.nn.softmax, name='y')
tf.summary.histogram('y', self.y)
def _init_optimizer(self):
self.loss = tf.losses.softmax_cross_entropy(self._targets_onehot, self.y)
self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.targets, tf.argmax(self.y, 1)), tf.float32))
self.optimizer = tf.train.AdamOptimizer(learning_rate=self._learning_rate).minimize(self.loss)
tf.summary.scalar('loss', self.loss)
tf.summary.scalar('accuracy', self.accuracy)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment