Skip to content

Instantly share code, notes, and snippets.

@williamFalcon
Last active June 4, 2018 17:29
Show Gist options
  • Save williamFalcon/f75b01b47bf1be8b1e80be173ebdf39c to your computer and use it in GitHub Desktop.
Save williamFalcon/f75b01b47bf1be8b1e80be173ebdf39c to your computer and use it in GitHub Desktop.
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F
"""
Blog post:
Taming LSTMs: Variable-sized mini-batches and why PyTorch is good for your health:
https://medium.com/@_willfalcon/taming-lstms-variable-sized-mini-batches-and-why-pytorch-is-good-for-your-health-61d35642972e
"""
class BieberLSTM(nn.Module):
def __init__(self, nb_layers, nb_lstm_units=100, embedding_dim=3, batch_size=3):
self.vocab = {'<PAD>': 0, 'is': 1, 'it': 2, 'too': 3, 'late': 4, 'now': 5, 'say': 6, 'sorry': 7, 'ooh': 8,
'yeah': 9}
self.tags = {'<PAD>': 0, 'VB': 1, 'PRP': 2, 'RB': 3, 'JJ': 4, 'NNP': 5}
self.nb_layers = nb_layers
self.nb_lstm_units = nb_lstm_units
self.embedding_dim = embedding_dim
self.batch_size = batch_size
# don't count the padding tag for the classifier output
self.nb_tags = len(self.tags) - 1
# when the model is bidirectional we double the output dimension
self.lstm
# build actual NN
self.__build_model()
def __build_model(self):
# build embedding layer first
nb_vocab_words = len(self.vocab)
# whenever the embedding sees the padding index it'll make the whole vector zeros
padding_idx = self.vocab['<PAD>']
self.word_embedding = nn.Embedding(
num_embeddings=nb_vocab_words,
embedding_dim=self.embedding_dim,
padding_idx=padding_idx
)
# design LSTM
self.lstm = nn.LSTM(
input_size=self.embedding_dim,
hidden_size=self.nb_lstm_units,
num_layers=self.nb_lstm_layers,
batch_first=True,
)
# output layer which projects back to tag space
self.hidden_to_tag = nn.Linear(self.nb_lstm_units, self.nb_tags)
def init_hidden(self):
# the weights are of the form (nb_layers, batch_size, nb_lstm_units)
hidden_a = torch.randn(self.hparams.nb_lstm_layers, self.batch_size, self.nb_lstm_units)
hidden_b = torch.randn(self.hparams.nb_lstm_layers, self.batch_size, self.nb_lstm_units)
if self.hparams.on_gpu:
hidden_a = hidden_a.cuda()
hidden_b = hidden_b.cuda()
hidden_a = Variable(hidden_a)
hidden_b = Variable(hidden_b)
return (hidden_a, hidden_b)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment