Lexie88rus/create_model.py

## create_model.py
# import PyTorch
import torch
import torch.nn as nn

# Create LSTM
class SimpleLSTM(nn.Module):
    '''
    Simple LSTM model to generate kernel titles.
    Arguments:
        - input_size - should be equal to the vocabulary size
        - output_size - should be equal to the vocabulary size
        - hidden_size - hyperparameter, size of the hidden state of LSTM.
    '''
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleLSTM, self).__init__()

        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(input_size, hidden_size)
        self.linear = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output, hidden = self.lstm(input.view(1, 1, -1), hidden)

        output = self.linear(output[-1].view(1, -1))

        output = self.softmax(output)
        return output, hidden

    # the initialization of the hidden state
    # device is cpu or cuda
    # I suggest using cude to speedup the computation
    def initHidden(self, device):
        return (torch.zeros(1, 1, n_hidden).to(device), torch.zeros(1, 1, n_hidden).to(device))

# Initialize LSTM

# number of hidden units
n_hidden = 128

# inputs and outputs of RNN are tensors representing words from the vocabulary
rnn = SimpleLSTM(vocab_size, n_hidden, vocab_size)
	# import PyTorch
	import torch
	import torch.nn as nn

	# Create LSTM
	class SimpleLSTM(nn.Module):
	'''
	Simple LSTM model to generate kernel titles.
	Arguments:
	- input_size - should be equal to the vocabulary size
	- output_size - should be equal to the vocabulary size
	- hidden_size - hyperparameter, size of the hidden state of LSTM.
	'''
	def __init__(self, input_size, hidden_size, output_size):
	super(SimpleLSTM, self).__init__()

	self.hidden_size = hidden_size

	self.lstm = nn.LSTM(input_size, hidden_size)
	self.linear = nn.Linear(hidden_size, output_size)
	self.softmax = nn.LogSoftmax(dim=1)

	def forward(self, input, hidden):
	output, hidden = self.lstm(input.view(1, 1, -1), hidden)

	output = self.linear(output[-1].view(1, -1))

	output = self.softmax(output)
	return output, hidden

	# the initialization of the hidden state
	# device is cpu or cuda
	# I suggest using cude to speedup the computation
	def initHidden(self, device):
	return (torch.zeros(1, 1, n_hidden).to(device), torch.zeros(1, 1, n_hidden).to(device))

	# Initialize LSTM

	# number of hidden units
	n_hidden = 128

	# inputs and outputs of RNN are tensors representing words from the vocabulary
	rnn = SimpleLSTM(vocab_size, n_hidden, vocab_size)