Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Textual entailment training using TensorFlow.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import urllib
import sys
import os
import zipfile
glove_vectors_file = "glove.6B.50d.txt"
snli_dev_file = "snli_1.0_dev.txt"
snli_full_dataset_file = "snli_1.0_train.txt"
glove_wordmap = {}
with open(glove_vectors_file, "r", encoding="utf-8") as glove:
for line in glove:
name, vector = tuple(line.split(" ", 1))
glove_wordmap[name] = np.fromstring(vector, sep=" ")
def sentence2sequence(sentence):
tokens = sentence.lower().split(" ")
rows = []
words = []
#Greedy search for tokens
for token in tokens:
i = len(token)
while len(token) > 0 and i > 0:
word = token[:i]
if word in glove_wordmap:
token = token[i:]
i = len(token)
i = i-1
return rows, words
rnn_size = 64
rnn = tf.contrib.rnn.BasicRNNCell(rnn_size)
#Constants setup
max_hypothesis_length, max_evidence_length = 30, 30
batch_size, vector_size, hidden_size = 128, 50, 64
lstm_size = hidden_size
weight_decay = 0.0001
learning_rate = 1
input_p, output_p = 0.5, 0.5
training_iterations_count = 100000
display_step = 10
def score_setup(row):
convert_dict = {
'entailment': 0,
'neutral': 1,
'contradiction': 2
score = np.zeros((3,))
for x in range(1,6):
tag = row["label"+str(x)]
if tag in convert_dict: score[convert_dict[tag]] += 1
return score / (1.0*np.sum(score))
def fit_to_size(matrix, shape):
res = np.zeros(shape)
slices = [slice(0,min(dim,shape[e])) for e, dim in enumerate(matrix.shape)]
res[slices] = matrix[slices]
return res
def split_data_into_scores():
import csv
with open(snli_dev_file,"r") as data:
train = csv.DictReader(data, delimiter='\t')
evi_sentences = []
hyp_sentences = []
labels = []
scores = []
for row in train:
hyp_sentences = np.stack([fit_to_size(x, (max_hypothesis_length, vector_size))
for x in hyp_sentences])
evi_sentences = np.stack([fit_to_size(x, (max_evidence_length, vector_size))
for x in evi_sentences])
return (hyp_sentences, evi_sentences), labels, np.array(scores)
data_feature_list, correct_values, correct_scores = split_data_into_scores()
l_h, l_e = max_hypothesis_length, max_evidence_length
N, D, H = batch_size, vector_size, hidden_size
l_seq = l_h + l_e
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
lstm_drop = tf.contrib.rnn.DropoutWrapper(lstm, input_p, output_p)
# N: The number of elements in each of our batches,
# which we use to train subsets of data for efficiency's sake.
# l_h: The maximum length of a hypothesis, or the second sentence. This is
# used because training an RNN is extraordinarily difficult without
# rolling it out to a fixed length.
# l_e: The maximum length of evidence, the first sentence. This is used
# because training an RNN is extraordinarily difficult without
# rolling it out to a fixed length.
# D: The size of our used GloVe or other vectors.
hyp = tf.placeholder(tf.float32, [N, l_h, D], 'hypothesis')
evi = tf.placeholder(tf.float32, [N, l_e, D], 'evidence')
y = tf.placeholder(tf.float32, [N, 3], 'label')
# hyp: Where the hypotheses will be stored during training.
# evi: Where the evidences will be stored during training.
# y: Where correct scores will be stored during training.
# lstm_size: the size of the gates in the LSTM,
# as in the first LSTM layer's initialization.
lstm_back = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# lstm_back: The LSTM used for looking backwards
# through the sentences, similar to lstm.
# input_p: the probability that inputs to the LSTM will be retained at each
# iteration of dropout.
# output_p: the probability that outputs from the LSTM will be retained at
# each iteration of dropout.
lstm_drop_back = tf.contrib.rnn.DropoutWrapper(lstm_back, input_p, output_p)
# lstm_drop_back: A dropout wrapper for lstm_back, like lstm_drop.
fc_initializer = tf.random_normal_initializer(stddev=0.1)
# fc_initializer: initial values for the fully connected layer's weights.
# hidden_size: the size of the outputs from each lstm layer.
# Multiplied by 2 to account for the two LSTMs.
fc_weight = tf.get_variable('fc_weight', [2*hidden_size, 3],
initializer = fc_initializer)
# fc_weight: Storage for the fully connected layer's weights.
fc_bias = tf.get_variable('bias', [3])
# fc_bias: Storage for the fully connected layer's bias.
# tf.GraphKeys.REGULARIZATION_LOSSES: A key to a collection in the graph
# designated for losses due to regularization.
# In this case, this portion of loss is regularization on the weights
# for the fully connected layer.
x = tf.concat([hyp, evi], 1) # N, (Lh+Le), d
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2]) # (Le+Lh), N, d
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, vector_size]) # (Le+Lh)*N, d
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(x, l_seq,)
# x: the inputs to the bidirectional_rnn
# tf.contrib.rnn.static_bidirectional_rnn: Runs the input through
# two recurrent networks, one that runs the inputs forward and one
# that runs the inputs in reversed order, combining the outputs.
rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm, lstm_back,
x, dtype=tf.float32)
# rnn_outputs: the list of LSTM outputs, as a list.
# What we want is the latest output, rnn_outputs[-1]
classification_scores = tf.matmul(rnn_outputs[-1], fc_weight) + fc_bias
# The scores are relative certainties for how likely the output matches
# a certain entailment:
# 0: Positive entailment
# 1: Neutral entailment
# 2: Negative entailment
with tf.variable_scope('Accuracy'):
predicts = tf.cast(tf.argmax(classification_scores, 1), 'int32')
y_label = tf.cast(tf.argmax(y, 1), 'int32')
corrects = tf.equal(predicts, y_label)
num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))
with tf.variable_scope("loss"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
logits = classification_scores, labels = y)
loss = tf.reduce_mean(cross_entropy)
total_loss = loss + weight_decay * tf.add_n(
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
opt_op = optimizer.minimize(total_loss)
# Initialize variables
init = tf.global_variables_initializer()
# Use TQDM if installed
tqdm_installed = False
from tqdm import tqdm
tqdm_installed = True
# Launch the Tensorflow session
sess = tf.Session()
writer = tf.summary.FileWriter('/log', sess.graph) # write to file
merge_op = tf.summary.merge_all() # operation to merge all summary
# training_iterations_count: The number of data pieces to train on in total
# batch_size: The number of data pieces per batch
training_iterations = range(0,training_iterations_count,batch_size)
if tqdm_installed:
# Add a progress bar if TQDM is installed
training_iterations = tqdm(training_iterations)
for i in training_iterations:
# Select indices for a random data subset
batch = np.random.randint(data_feature_list[0].shape[0], size=batch_size)
# Use the selected subset indices to initialize the graph's
# placeholder values
hyps, evis, ys = (data_feature_list[0][batch,:],
# Run the optimization with these initialized values
r =[opt_op], feed_dict={hyp: hyps, evi: evis, y: ys})
# display_step: how often the accuracy and loss should
# be tested and displayed.
if (i/batch_size) % display_step == 0:
# Calculate batch accuracy
acc =, feed_dict={hyp: hyps, evi: evis, y: ys})
# Calculate batch loss
tmp_loss =, feed_dict={hyp: hyps, evi: evis, y: ys})
# Display results
print("Iter " + str(i/batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(tmp_loss) + ", Training Accuracy= " + \
summary = tf.Summary(value=[tf.Summary.Value(tag="Accuracy",
writer.add_summary(summary, i)
evidences = ["Janos and Jade both were at the scene of the car crash."]
hypotheses = ["Multiple people saw the accident."]
sentence1 = [fit_to_size(np.vstack(sentence2sequence(evidence)[0]),
(30, 50)) for evidence in evidences]
sentence2 = [fit_to_size(np.vstack(sentence2sequence(hypothesis)[0]),
(30,50)) for hypothesis in hypotheses]
prediction =, feed_dict={hyp: (sentence1 * N),
evi: (sentence2 * N),
y: [[0,0,0]]*N})
print(["Positive", "Neutral", "Negative"][np.argmax(prediction[0])]+
" entailment")

This comment has been minimized.

Copy link
Owner Author

@Orbifold Orbifold commented Jun 21, 2018

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment