ritchie46/rnn_minibatch.py

## rnn_minibatch.py
"""
How to do minibatches for RNNs in pytorch

Assume we feed characters to the model and predict the language of the words.
"""


def prepare_batch(x, y):
    # determine the maximum word length per batch and zero pad the tensors
    n_max = max([a.shape[0] for a in x])
    pad = np.zeros((n_max, len(x), x[0].shape[2]))

    lengths = []
    for i in range(len(x)):
        lengths.append(x[i].shape[0])
        # shape = (n-dtc, n-batch, n-features)
        pad[:x[i].shape[0], i:i + 1, :] = x[i]

    # mini-batch needs to be in decreasing order for pack_padded (pytorch)
    lengths = np.array(lengths)
    idx = np.argsort(lengths)[::-1]

    return pad[:, idx, :], lengths[idx], y[idx]

# the tensors in x_train have various dimensions due to different length words
# x_train = list[ array(n_characters, n_batches == 1, n_features) ... array(n_characters, n_batches == 1, n_features))
# y_train = array([1, 12, 6, ... 3, 1])  (labels)
# pad, lengths, _ = prepare_batch(x_train[:10], y_train[:10])

## Model

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size

        self.rnn = nn.GRU(input_size, hidden_size, num_layers=1)
        self.linear1 = nn.Linear(hidden_size, output_size)

        self.logsoftmax = nn.LogSoftmax(dim=1)
        self.softmax = nn.Softmax(dim=1)
        self.inference = False

    def forward(self, x, lengths):
        hidden = self.init_hidden(x)

         # pack_padded_sequence so that padded items in the sequence won't be shown the rnn
        x = torch.nn.utils.rnn.pack_padded_sequence(x, lengths)
        x, hidden = self.rnn(x, hidden)
        x, _ = torch.nn.utils.rnn.pad_packed_sequence(x)

        lengths = lengths - 1
        # select only the last output per word (many to one)
        x = x[lengths, np.arange(x.shape[1]), :]

        x = F.relu(x)
        x = self.linear1(x)

        if self.inference:
            output = self.softmax(x)
        else:
            output = self.logsoftmax(x)
        return output

    def init_hidden(self, x):
        if next(self.parameters()).is_cuda:
            return torch.zeros(1, x.size(1), self.hidden_size).float().cuda()
        return torch.zeros(1, x.size(1), self.hidden_size).float()

m = RNN(x_train[0].shape[2], 50, len(np.unique(y_train)))
m(torch.tensor(pad).float(), lengths).shape
>>> # some shape

criterion = nn.NLLLoss()
optim = torch.optim.Adam(m.parameters(), lr=0.001)

## Train

epochs = 25
batch_size = 50
print_iter = 100

m.cuda()
tboard = True

m.train(True)
def get_prediction(x, y):
    pad, lengths, y = prepare_batch(x, y)
    x = torch.tensor(pad).float()
    if next(m.parameters()).is_cuda:
        x = x.cuda()
        lengths = torch.tensor(lengths).long().cuda()
    return m(x, lengths), torch.tensor(y, dtype=torch.long).cuda()

def test_eval():
    batch_pred, batch_y = get_prediction(x_test, y_test)
    batch_pred = batch_pred.cpu().data.numpy().argmax(1)
    batch_y = batch_y.cpu().data.numpy()
    return batch_pred, batch_y

idx = np.arange(x_train.shape[0])

for epoch in range(epochs):

    np.random.shuffle(idx)
    x_train = x_train[idx]
    y_train = y_train[idx]

    current_batch = 0
    for iteration in range(y.shape[0] // batch_size):
        batch_x = x_train[current_batch: current_batch + batch_size]
        batch_y = y_train[current_batch: current_batch + batch_size]
        current_batch += batch_size

        optim.zero_grad()
        if len(batch_x) > 0:
            batch_pred, batch_y = get_prediction(batch_x, batch_y)
            loss = criterion(batch_pred, batch_y)
            loss.backward()
            optim.step()

            if iteration % print_iter == 0:
                with torch.no_grad():
                    m.train(False)
                    batch_pred, batch_y = test_eval()
                    f1 = f1_score(batch_y, batch_pred, average='weighted')
                    precision = precision_score(batch_y, batch_pred, average='weighted')
                    print(loss.item(), '\titeraton:', iteration, '\tepoch', epoch, 'f1', f1)
                    m.train(True)
	"""
	How to do minibatches for RNNs in pytorch

	Assume we feed characters to the model and predict the language of the words.
	"""


	def prepare_batch(x, y):
	# determine the maximum word length per batch and zero pad the tensors
	n_max = max([a.shape[0] for a in x])
	pad = np.zeros((n_max, len(x), x[0].shape[2]))

	lengths = []
	for i in range(len(x)):
	lengths.append(x[i].shape[0])
	# shape = (n-dtc, n-batch, n-features)
	pad[:x[i].shape[0], i:i + 1, :] = x[i]

	# mini-batch needs to be in decreasing order for pack_padded (pytorch)
	lengths = np.array(lengths)
	idx = np.argsort(lengths)[::-1]

	return pad[:, idx, :], lengths[idx], y[idx]

	# the tensors in x_train have various dimensions due to different length words
	# x_train = list[ array(n_characters, n_batches == 1, n_features) ... array(n_characters, n_batches == 1, n_features))
	# y_train = array([1, 12, 6, ... 3, 1]) (labels)
	# pad, lengths, _ = prepare_batch(x_train[:10], y_train[:10])

	## Model

	class RNN(nn.Module):
	def __init__(self, input_size, hidden_size, output_size):
	super(RNN, self).__init__()
	self.hidden_size = hidden_size

	self.rnn = nn.GRU(input_size, hidden_size, num_layers=1)
	self.linear1 = nn.Linear(hidden_size, output_size)

	self.logsoftmax = nn.LogSoftmax(dim=1)
	self.softmax = nn.Softmax(dim=1)
	self.inference = False

	def forward(self, x, lengths):
	hidden = self.init_hidden(x)

	# pack_padded_sequence so that padded items in the sequence won't be shown the rnn
	x = torch.nn.utils.rnn.pack_padded_sequence(x, lengths)
	x, hidden = self.rnn(x, hidden)
	x, _ = torch.nn.utils.rnn.pad_packed_sequence(x)

	lengths = lengths - 1
	# select only the last output per word (many to one)
	x = x[lengths, np.arange(x.shape[1]), :]

	x = F.relu(x)
	x = self.linear1(x)

	if self.inference:
	output = self.softmax(x)
	else:
	output = self.logsoftmax(x)
	return output

	def init_hidden(self, x):
	if next(self.parameters()).is_cuda:
	return torch.zeros(1, x.size(1), self.hidden_size).float().cuda()
	return torch.zeros(1, x.size(1), self.hidden_size).float()

	m = RNN(x_train[0].shape[2], 50, len(np.unique(y_train)))
	m(torch.tensor(pad).float(), lengths).shape
	>>> # some shape

	criterion = nn.NLLLoss()
	optim = torch.optim.Adam(m.parameters(), lr=0.001)

	## Train

	epochs = 25
	batch_size = 50
	print_iter = 100

	m.cuda()
	tboard = True

	m.train(True)
	def get_prediction(x, y):
	pad, lengths, y = prepare_batch(x, y)
	x = torch.tensor(pad).float()
	if next(m.parameters()).is_cuda:
	x = x.cuda()
	lengths = torch.tensor(lengths).long().cuda()
	return m(x, lengths), torch.tensor(y, dtype=torch.long).cuda()

	def test_eval():
	batch_pred, batch_y = get_prediction(x_test, y_test)
	batch_pred = batch_pred.cpu().data.numpy().argmax(1)
	batch_y = batch_y.cpu().data.numpy()
	return batch_pred, batch_y

	idx = np.arange(x_train.shape[0])

	for epoch in range(epochs):

	np.random.shuffle(idx)
	x_train = x_train[idx]
	y_train = y_train[idx]

	current_batch = 0
	for iteration in range(y.shape[0] // batch_size):
	batch_x = x_train[current_batch: current_batch + batch_size]
	batch_y = y_train[current_batch: current_batch + batch_size]
	current_batch += batch_size

	optim.zero_grad()
	if len(batch_x) > 0:
	batch_pred, batch_y = get_prediction(batch_x, batch_y)
	loss = criterion(batch_pred, batch_y)
	loss.backward()
	optim.step()

	if iteration % print_iter == 0:
	with torch.no_grad():
	m.train(False)
	batch_pred, batch_y = test_eval()
	f1 = f1_score(batch_y, batch_pred, average='weighted')
	precision = precision_score(batch_y, batch_pred, average='weighted')
	print(loss.item(), '\titeraton:', iteration, '\tepoch', epoch, 'f1', f1)
	m.train(True)