williamFalcon/BiLSTM_net_design.py

## BiLSTM_net_design.py
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F

"""
Blog post:
Taming LSTMs: Variable-sized mini-batches and why PyTorch is good for your health:
https://medium.com/@_willfalcon/taming-lstms-variable-sized-mini-batches-and-why-pytorch-is-good-for-your-health-61d35642972e
"""


class BieberLSTM(nn.Module):
    def __init__(self, nb_layers, nb_lstm_units=100, embedding_dim=3, batch_size=3):
        self.vocab = {'<PAD>': 0, 'is': 1, 'it': 2, 'too': 3, 'late': 4, 'now': 5, 'say': 6, 'sorry': 7, 'ooh': 8,
                      'yeah': 9}
        self.tags = {'<PAD>': 0, 'VB': 1, 'PRP': 2, 'RB': 3, 'JJ': 4, 'NNP': 5}

        self.nb_layers = nb_layers
        self.nb_lstm_units = nb_lstm_units
        self.embedding_dim = embedding_dim
        self.batch_size = batch_size

        # don't count the padding tag for the classifier output
        self.nb_tags = len(self.tags) - 1

        # when the model is bidirectional we double the output dimension
        self.lstm

        # build actual NN
        self.__build_model()

    def __build_model(self):
        # build embedding layer first
        nb_vocab_words = len(self.vocab)

        # whenever the embedding sees the padding index it'll make the whole vector zeros
        padding_idx = self.vocab['<PAD>']
        self.word_embedding = nn.Embedding(
            num_embeddings=nb_vocab_words,
            embedding_dim=self.embedding_dim,
            padding_idx=padding_idx
        )

        # design LSTM
        self.lstm = nn.LSTM(
            input_size=self.embedding_dim,
            hidden_size=self.nb_lstm_units,
            num_layers=self.nb_lstm_layers,
            batch_first=True,
        )

        # output layer which projects back to tag space
        self.hidden_to_tag = nn.Linear(self.nb_lstm_units, self.nb_tags)

    def init_hidden(self):
        # the weights are of the form (nb_layers, batch_size, nb_lstm_units)
        hidden_a = torch.randn(self.hparams.nb_lstm_layers, self.batch_size, self.nb_lstm_units)
        hidden_b = torch.randn(self.hparams.nb_lstm_layers, self.batch_size, self.nb_lstm_units)

        if self.hparams.on_gpu:
            hidden_a = hidden_a.cuda()
            hidden_b = hidden_b.cuda()

        hidden_a = Variable(hidden_a)
        hidden_b = Variable(hidden_b)

        return (hidden_a, hidden_b)
	import torch
	import torch.nn as nn
	from torch.autograd import Variable
	from torch.nn import functional as F

	"""
	Blog post:
	Taming LSTMs: Variable-sized mini-batches and why PyTorch is good for your health:
	https://medium.com/@_willfalcon/taming-lstms-variable-sized-mini-batches-and-why-pytorch-is-good-for-your-health-61d35642972e
	"""


	class BieberLSTM(nn.Module):
	def __init__(self, nb_layers, nb_lstm_units=100, embedding_dim=3, batch_size=3):
	self.vocab = {'<PAD>': 0, 'is': 1, 'it': 2, 'too': 3, 'late': 4, 'now': 5, 'say': 6, 'sorry': 7, 'ooh': 8,
	'yeah': 9}
	self.tags = {'<PAD>': 0, 'VB': 1, 'PRP': 2, 'RB': 3, 'JJ': 4, 'NNP': 5}

	self.nb_layers = nb_layers
	self.nb_lstm_units = nb_lstm_units
	self.embedding_dim = embedding_dim
	self.batch_size = batch_size

	# don't count the padding tag for the classifier output
	self.nb_tags = len(self.tags) - 1

	# when the model is bidirectional we double the output dimension
	self.lstm

	# build actual NN
	self.__build_model()

	def __build_model(self):
	# build embedding layer first
	nb_vocab_words = len(self.vocab)

	# whenever the embedding sees the padding index it'll make the whole vector zeros
	padding_idx = self.vocab['<PAD>']
	self.word_embedding = nn.Embedding(
	num_embeddings=nb_vocab_words,
	embedding_dim=self.embedding_dim,
	padding_idx=padding_idx
	)

	# design LSTM
	self.lstm = nn.LSTM(
	input_size=self.embedding_dim,
	hidden_size=self.nb_lstm_units,
	num_layers=self.nb_lstm_layers,
	batch_first=True,
	)

	# output layer which projects back to tag space
	self.hidden_to_tag = nn.Linear(self.nb_lstm_units, self.nb_tags)

	def init_hidden(self):
	# the weights are of the form (nb_layers, batch_size, nb_lstm_units)
	hidden_a = torch.randn(self.hparams.nb_lstm_layers, self.batch_size, self.nb_lstm_units)
	hidden_b = torch.randn(self.hparams.nb_lstm_layers, self.batch_size, self.nb_lstm_units)

	if self.hparams.on_gpu:
	hidden_a = hidden_a.cuda()
	hidden_b = hidden_b.cuda()

	hidden_a = Variable(hidden_a)
	hidden_b = Variable(hidden_b)

	return (hidden_a, hidden_b)