Created
October 13, 2016 00:25
-
-
Save ad2476/d6f01a7596994e8871af4bd13ef72c69 to your computer and use it in GitHub Desktop.
cs295k hw4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import sys | |
import argparse | |
import pickle | |
batch_size = 20 | |
embed_size = 30 | |
hidden_size = 100 | |
# text is a list of sentences | |
def makeWordIDs(corpus): | |
wordIDs = {} | |
index = 0 | |
for word in corpus: | |
if word not in wordIDs: | |
wordIDs[word] = index | |
index += 1 | |
return wordIDs # maps word -> int | |
def parseArgs(): | |
parser = argparse.ArgumentParser(description="cs295k hw4 language model") | |
parser.add_argument("-s", "--save", help="save model to file") | |
return parser.parse_args() | |
if __name__ == "__main__": | |
args = parseArgs() | |
print("Loading text corpora...") | |
with open("train.txt", "r") as f: | |
corpus = [] | |
for line in f: | |
corpus.extend(line.split()) | |
with open("test.txt", "r") as f: | |
test = [] | |
for line in f: | |
test.extend(line.split()) | |
wordIDs = makeWordIDs(corpus) | |
vocab_size = len(wordIDs.keys()) | |
corpus = [wordIDs[w] for w in corpus] # convert to ints | |
test = [wordIDs[w] for w in test] # convert to ints | |
# setup placeholders: | |
inpt = tf.placeholder(tf.int32, [None]) # corpus | |
output = tf.placeholder(tf.int32,[None]) | |
E = tf.Variable(tf.truncated_normal([vocab_size, embed_size], stddev=0.1)) | |
W1 = tf.Variable(tf.truncated_normal([embed_size, hidden_size], stddev=0.1)) # W_1 | |
b1 = tf.Variable(tf.constant(0.01,shape=[hidden_size])) # first bias | |
W2 = tf.Variable(tf.truncated_normal([hidden_size,vocab_size], stddev=0.1)) | |
b2 = tf.Variable(tf.constant(0.01,shape=[vocab_size])) | |
Elookup = tf.nn.embedding_lookup(E, inpt) | |
relu_layer = tf.nn.relu(tf.matmul(Elookup, W1) + b1) | |
logits = tf.matmul(relu_layer, W2) + b2 | |
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, output) | |
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) | |
perplexity = tf.exp(tf.reduce_mean(cross_entropy)) | |
sess = tf.Session() | |
sess.run(tf.initialize_all_variables()) | |
# train the model | |
total = int(len(corpus)/batch_size) | |
n = 0 | |
for i in range(0,len(corpus)-batch_size-1,batch_size): | |
words = corpus[i:i+batch_size] | |
nextwords = corpus[i+1:i+batch_size+1] | |
train_step.run(feed_dict={inpt: words, output: nextwords}, session=sess) | |
if not n%100: | |
print("Batch #%d of %d (%.2f%%):"%(n,total,100*n/total)) | |
print("\tTrain perplexity: %.6f"%perplexity.eval(feed_dict={inpt:words, output:nextwords}, session=sess)) | |
n+=1 | |
# evaluate: | |
print("Evaluating model...") | |
print("Test perplexity: %.6f"%perplexity.eval(feed_dict={inpt: test[:-1], output: test[1:]}, session=sess)) | |
# support saving trained model to file, not required | |
if args.save: | |
tf.add_to_collection('logits', logits) | |
tf.add_to_collection('inpt', inpt) | |
saver = tf.train.Saver() | |
print("Saving model to disk...") | |
p = saver.save(sess, args.save) | |
with open("%s.dict"%args.save, "wb") as f: | |
pickle.dump(wordIDs, f) | |
print("Wrote model to %s"%p) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment