Skip to content

Instantly share code, notes, and snippets.

@rasoolims
Created November 16, 2017 03:23
Show Gist options
  • Save rasoolims/fa457681a9f1ad49da028485d565227b to your computer and use it in GitHub Desktop.
Save rasoolims/fa457681a9f1ad49da028485d565227b to your computer and use it in GitHub Desktop.
import dynet as dynet
import random
import matplotlib.pyplot as plt
import numpy as np
class Network:
def __init__(self, vocab, properties):
self.properties = properties
self.vocab = vocab
# first initialize a computation graph container (or model).
self.model = dynet.Model()
# assign the algorithm for backpropagation updates.
self.updater = dynet.AdamTrainer(self.model)
# create embeddings for words and tag features.
self.word_embedding = self.model.add_lookup_parameters((vocab.num_words(), properties.word_embed_dim))
self.tag_embedding = self.model.add_lookup_parameters((vocab.num_tag_feats(), properties.pos_embed_dim))
# assign transfer function
self.transfer = dynet.rectify # can be dynet.logistic or dynet.tanh as well.
# define the input dimension for the embedding layer.
# here we assume to see two words after and before and current word (meaning 5 word embeddings)
# and to see the last two predicted tags (meaning two tag embeddings)
self.input_dim = 5 * properties.word_embed_dim + 2 * properties.pos_embed_dim
# define the hidden layer.
self.hidden_layer = self.model.add_parameters((properties.hidden_dim, self.input_dim))
# define the hidden layer bias term and initialize it as constant 0.2.
self.hidden_layer_bias = self.model.add_parameters(properties.hidden_dim, init=dynet.ConstInitializer(0.2))
# define the output weight.
self.output_layer = self.model.add_parameters((vocab.num_tags(), properties.hidden_dim))
# define the bias vector and initialize it as zero.
self.output_bias = self.model.add_parameters(vocab.num_tags(), init=dynet.ConstInitializer(0))
def forward(self, features):
# extract word and tags ids
word_ids = [self.vocab.word2id(word_feat) for word_feat in features[0:5]]
tag_ids = [self.vocab.feat_tag2id(tag_feat) for tag_feat in features[5:]]
# extract word embeddings and tag embeddings from features
word_embeds = [self.word_embedding[wid] for wid in word_ids]
tag_embeds = [self.tag_embedding[tid] for tid in tag_ids]
# concatenating all features (recall that '+' for lists is equivalent to appending two lists)
embedding_layer = dynet.concatenate(word_embeds + tag_embeds)
# calculating the hidden layer
# .expr() converts a parameter to a matrix expression in dynetnet (its a dynetnet-specific syntax).
hidden = self.transfer(self.hidden_layer.expr() * embedding_layer + self.hidden_layer_bias.expr())
# calculating the output layer
output = self.output_layer.expr() * hidden + self.output_bias.expr()
# return a list of outputs
return output
def train(self, train_file, epochs):
# matplotlib config
loss_values = []
plt.ion()
ax = plt.gca()
ax.set_xlim([0, 10])
ax.set_ylim([0, 3])
plt.title("Loss over time")
plt.xlabel("Minibatch")
plt.ylabel("Loss")
for i in range(epochs):
print 'started epoch', (i+1)
losses = []
train_data = open(train_file, 'r').read().strip().split('\n')
# shuffle the training data.
random.shuffle(train_data)
step = 0
for line in open(train_file, 'r'):
fields = line.strip().split('\t')
features, label = fields[:-1], fields[-1]
gold_label = self.vocab.tag2id(label)
result = self.forward(features)
# getting loss with respect to negative log softmax function and the gold label.
loss = dynet.pickneglogsoftmax(result, gold_label)
# appending to the minibatch losses
losses.append(loss)
step += 1
if len(losses) >= self.properties.minibatch_size:
# now we have enough loss values to get loss for minibatch
minibatch_loss = dynet.esum(losses) / len(losses)
# calling dynetnet to run forward computation for all minibatch items
minibatch_loss.forward()
# getting float value of the loss for current minibatch
minibatch_loss_value = minibatch_loss.value()
# printing info and plotting
loss_values.append(minibatch_loss_value)
if len(loss_values)%10==0:
ax.set_xlim([0, len(loss_values)+10])
ax.plot(loss_values)
plt.draw()
plt.pause(0.0001)
progress = round(100 * float(step) / len(train_data), 2)
print 'current minibatch loss', minibatch_loss_value, 'progress:', progress, '%'
# calling dynetnet to run backpropagation
minibatch_loss.backward()
# calling dynetnet to change parameter values with respect to current backpropagation
self.updater.update()
# empty the loss vector
losses = []
# refresh the memory of dynetnet
dynet.renew_cg()
# there are still some minibatch items in the memory but they are smaller than the minibatch size
# so we ask dynet to forget them
dynet.renew_cg()
def decode(self, words):
# first putting two start symbols
words = ['<s>', '<s>'] + words + ['</s>', '</s>']
tags = ['<s>', '<s>']
for i in range(2, len(words) - 2):
features = words[i - 2:i + 3] + tags[i - 2:i]
# running forward
output = self.forward(features)
# getting list value of the output
scores = output.npvalue()
# getting best tag
best_tag_id = np.argmax(scores)
# assigning the best tag
tags.append(self.vocab.tagid2tag_str(best_tag_id))
# refresh dynet memory (computation graph)
dynet.renew_cg()
return tags[2:]
def load(self, filename):
self.model.populate(filename)
def save(self, filename):
self.model.save(filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment