Skip to content

Instantly share code, notes, and snippets.

@ad2476
Created October 13, 2016 00:25
Show Gist options
  • Save ad2476/d6f01a7596994e8871af4bd13ef72c69 to your computer and use it in GitHub Desktop.
Save ad2476/d6f01a7596994e8871af4bd13ef72c69 to your computer and use it in GitHub Desktop.
cs295k hw4
import tensorflow as tf
import sys
import argparse
import pickle
batch_size = 20
embed_size = 30
hidden_size = 100
# text is a list of sentences
def makeWordIDs(corpus):
wordIDs = {}
index = 0
for word in corpus:
if word not in wordIDs:
wordIDs[word] = index
index += 1
return wordIDs # maps word -> int
def parseArgs():
parser = argparse.ArgumentParser(description="cs295k hw4 language model")
parser.add_argument("-s", "--save", help="save model to file")
return parser.parse_args()
if __name__ == "__main__":
args = parseArgs()
print("Loading text corpora...")
with open("train.txt", "r") as f:
corpus = []
for line in f:
corpus.extend(line.split())
with open("test.txt", "r") as f:
test = []
for line in f:
test.extend(line.split())
wordIDs = makeWordIDs(corpus)
vocab_size = len(wordIDs.keys())
corpus = [wordIDs[w] for w in corpus] # convert to ints
test = [wordIDs[w] for w in test] # convert to ints
# setup placeholders:
inpt = tf.placeholder(tf.int32, [None]) # corpus
output = tf.placeholder(tf.int32,[None])
E = tf.Variable(tf.truncated_normal([vocab_size, embed_size], stddev=0.1))
W1 = tf.Variable(tf.truncated_normal([embed_size, hidden_size], stddev=0.1)) # W_1
b1 = tf.Variable(tf.constant(0.01,shape=[hidden_size])) # first bias
W2 = tf.Variable(tf.truncated_normal([hidden_size,vocab_size], stddev=0.1))
b2 = tf.Variable(tf.constant(0.01,shape=[vocab_size]))
Elookup = tf.nn.embedding_lookup(E, inpt)
relu_layer = tf.nn.relu(tf.matmul(Elookup, W1) + b1)
logits = tf.matmul(relu_layer, W2) + b2
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, output)
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
perplexity = tf.exp(tf.reduce_mean(cross_entropy))
sess = tf.Session()
sess.run(tf.initialize_all_variables())
# train the model
total = int(len(corpus)/batch_size)
n = 0
for i in range(0,len(corpus)-batch_size-1,batch_size):
words = corpus[i:i+batch_size]
nextwords = corpus[i+1:i+batch_size+1]
train_step.run(feed_dict={inpt: words, output: nextwords}, session=sess)
if not n%100:
print("Batch #%d of %d (%.2f%%):"%(n,total,100*n/total))
print("\tTrain perplexity: %.6f"%perplexity.eval(feed_dict={inpt:words, output:nextwords}, session=sess))
n+=1
# evaluate:
print("Evaluating model...")
print("Test perplexity: %.6f"%perplexity.eval(feed_dict={inpt: test[:-1], output: test[1:]}, session=sess))
# support saving trained model to file, not required
if args.save:
tf.add_to_collection('logits', logits)
tf.add_to_collection('inpt', inpt)
saver = tf.train.Saver()
print("Saving model to disk...")
p = saver.save(sess, args.save)
with open("%s.dict"%args.save, "wb") as f:
pickle.dump(wordIDs, f)
print("Wrote model to %s"%p)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment