Created
November 16, 2017 03:23
-
-
Save rasoolims/fa457681a9f1ad49da028485d565227b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dynet as dynet | |
import random | |
import matplotlib.pyplot as plt | |
import numpy as np | |
class Network: | |
def __init__(self, vocab, properties): | |
self.properties = properties | |
self.vocab = vocab | |
# first initialize a computation graph container (or model). | |
self.model = dynet.Model() | |
# assign the algorithm for backpropagation updates. | |
self.updater = dynet.AdamTrainer(self.model) | |
# create embeddings for words and tag features. | |
self.word_embedding = self.model.add_lookup_parameters((vocab.num_words(), properties.word_embed_dim)) | |
self.tag_embedding = self.model.add_lookup_parameters((vocab.num_tag_feats(), properties.pos_embed_dim)) | |
# assign transfer function | |
self.transfer = dynet.rectify # can be dynet.logistic or dynet.tanh as well. | |
# define the input dimension for the embedding layer. | |
# here we assume to see two words after and before and current word (meaning 5 word embeddings) | |
# and to see the last two predicted tags (meaning two tag embeddings) | |
self.input_dim = 5 * properties.word_embed_dim + 2 * properties.pos_embed_dim | |
# define the hidden layer. | |
self.hidden_layer = self.model.add_parameters((properties.hidden_dim, self.input_dim)) | |
# define the hidden layer bias term and initialize it as constant 0.2. | |
self.hidden_layer_bias = self.model.add_parameters(properties.hidden_dim, init=dynet.ConstInitializer(0.2)) | |
# define the output weight. | |
self.output_layer = self.model.add_parameters((vocab.num_tags(), properties.hidden_dim)) | |
# define the bias vector and initialize it as zero. | |
self.output_bias = self.model.add_parameters(vocab.num_tags(), init=dynet.ConstInitializer(0)) | |
def forward(self, features): | |
# extract word and tags ids | |
word_ids = [self.vocab.word2id(word_feat) for word_feat in features[0:5]] | |
tag_ids = [self.vocab.feat_tag2id(tag_feat) for tag_feat in features[5:]] | |
# extract word embeddings and tag embeddings from features | |
word_embeds = [self.word_embedding[wid] for wid in word_ids] | |
tag_embeds = [self.tag_embedding[tid] for tid in tag_ids] | |
# concatenating all features (recall that '+' for lists is equivalent to appending two lists) | |
embedding_layer = dynet.concatenate(word_embeds + tag_embeds) | |
# calculating the hidden layer | |
# .expr() converts a parameter to a matrix expression in dynetnet (its a dynetnet-specific syntax). | |
hidden = self.transfer(self.hidden_layer.expr() * embedding_layer + self.hidden_layer_bias.expr()) | |
# calculating the output layer | |
output = self.output_layer.expr() * hidden + self.output_bias.expr() | |
# return a list of outputs | |
return output | |
def train(self, train_file, epochs): | |
# matplotlib config | |
loss_values = [] | |
plt.ion() | |
ax = plt.gca() | |
ax.set_xlim([0, 10]) | |
ax.set_ylim([0, 3]) | |
plt.title("Loss over time") | |
plt.xlabel("Minibatch") | |
plt.ylabel("Loss") | |
for i in range(epochs): | |
print 'started epoch', (i+1) | |
losses = [] | |
train_data = open(train_file, 'r').read().strip().split('\n') | |
# shuffle the training data. | |
random.shuffle(train_data) | |
step = 0 | |
for line in open(train_file, 'r'): | |
fields = line.strip().split('\t') | |
features, label = fields[:-1], fields[-1] | |
gold_label = self.vocab.tag2id(label) | |
result = self.forward(features) | |
# getting loss with respect to negative log softmax function and the gold label. | |
loss = dynet.pickneglogsoftmax(result, gold_label) | |
# appending to the minibatch losses | |
losses.append(loss) | |
step += 1 | |
if len(losses) >= self.properties.minibatch_size: | |
# now we have enough loss values to get loss for minibatch | |
minibatch_loss = dynet.esum(losses) / len(losses) | |
# calling dynetnet to run forward computation for all minibatch items | |
minibatch_loss.forward() | |
# getting float value of the loss for current minibatch | |
minibatch_loss_value = minibatch_loss.value() | |
# printing info and plotting | |
loss_values.append(minibatch_loss_value) | |
if len(loss_values)%10==0: | |
ax.set_xlim([0, len(loss_values)+10]) | |
ax.plot(loss_values) | |
plt.draw() | |
plt.pause(0.0001) | |
progress = round(100 * float(step) / len(train_data), 2) | |
print 'current minibatch loss', minibatch_loss_value, 'progress:', progress, '%' | |
# calling dynetnet to run backpropagation | |
minibatch_loss.backward() | |
# calling dynetnet to change parameter values with respect to current backpropagation | |
self.updater.update() | |
# empty the loss vector | |
losses = [] | |
# refresh the memory of dynetnet | |
dynet.renew_cg() | |
# there are still some minibatch items in the memory but they are smaller than the minibatch size | |
# so we ask dynet to forget them | |
dynet.renew_cg() | |
def decode(self, words): | |
# first putting two start symbols | |
words = ['<s>', '<s>'] + words + ['</s>', '</s>'] | |
tags = ['<s>', '<s>'] | |
for i in range(2, len(words) - 2): | |
features = words[i - 2:i + 3] + tags[i - 2:i] | |
# running forward | |
output = self.forward(features) | |
# getting list value of the output | |
scores = output.npvalue() | |
# getting best tag | |
best_tag_id = np.argmax(scores) | |
# assigning the best tag | |
tags.append(self.vocab.tagid2tag_str(best_tag_id)) | |
# refresh dynet memory (computation graph) | |
dynet.renew_cg() | |
return tags[2:] | |
def load(self, filename): | |
self.model.populate(filename) | |
def save(self, filename): | |
self.model.save(filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment