nishnik/lstm.py

## lstm.py

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.datasets import imdb

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import random
from torch.autograd import Variable

max_features = 20000
maxlen = 80  # cut texts after this number of words (among top max_features most common words)
batch_size = 32

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)


class LSTMClassifier(nn.Module):
    def __init__(self):
        super(LSTMClassifier, self).__init__()
        self.word_embeddings = nn.Embedding(20000, 128) #(max_features, embedding size)
        self.lstm = nn.LSTM(128, 128) #(embedding_dim, hidden_dim)
        self.hidden2label = nn.Linear(128, 2)#(hidden_dim, label_size)
        self.hidden = self.init_hidden()
    def init_hidden(self):
        # the first is the hidden h
        # the second is the cell  c
        return (autograd.Variable(torch.zeros(1, 1, 128)), #(1, 1, self.hidden_dim)
                autograd.Variable(torch.zeros(1, 1, 128))) #(1, 1, self.hidden_dim)
    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        x = embeds.view(len(sentence), 1, -1)
        lstm_out, self.hidden = self.lstm(x, self.hidden)
        y  = self.hidden2label(lstm_out[-1])
        log_probs = F.log_softmax(y)
        return log_probs

model = LSTMClassifier()


model(Variable(torch.from_numpy(x_train[0]).long()))
loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(),lr = 1e-3)

import numpy as np


def evaluate(model, data_x, data_y, loss_function):
    model.eval()
    avg_loss = 0.0
    truth_res = []
    pred_res = []
    for rep in range(len(data_x)):
        y = np.ndarray(1)
        y[0] = data_y[rep]
        truth_res.append(y[0])
        y = Variable(torch.from_numpy(y).long())
        # detaching it from its history on the last instance.
        # model.hidden = model.init_hidden()
        pred = model(Variable(torch.from_numpy(data_x[rep]).long()))
        pred_label = pred.data.max(1)[1].numpy()
        pred_res.append(pred_label)
        loss = loss_function(pred, y)
        avg_loss += loss.data[0]
    avg_loss /= len(data_x)
    acc = get_accuracy(truth_res, pred_res)
    print(name + ' avg_loss:%g train acc:%g' % (avg_loss, acc ))
    return acc


def get_accuracy(truth, pred):
     assert len(truth)==len(pred)
     right = 0
     for i in range(len(truth)):
         if truth[i]==pred[i]:
             right += 1.0
     return right/len(truth)


def train_epoch(model, train_data_x, train_data_y, loss_function, optimizer, i):
    model.train()
    avg_loss = 0.0
    count = 0
    truth_res = []
    pred_res = []
    batch_sent = []
    prev_loss = 0.0
    for rep in range(len(train_data_x)):
        y = np.ndarray(1)
        y[0] = train_data_y[rep]
        truth_res.append(y[0])
        y = Variable(torch.from_numpy(y).long())
        # detaching it from its history on the last instance.
        model.hidden = model.init_hidden()
        pred = model(Variable(torch.from_numpy(train_data_x[rep]).long()))
        pred_label = pred.data.max(1)[1].numpy()
        pred_res.append(pred_label)
        model.zero_grad()
        loss = loss_function(pred, y)
        avg_loss += loss.data[0]
        prev_loss += loss.data[0]
        count += 1
        if count % 200 == 0:
            print('epoch: %d iterations: %d loss :%g' % (i, count, prev_loss))
            prev_loss = 0.0
        loss.backward()
        optimizer.step()
    avg_loss /= len(train_data_x)
    print('epoch: %d done! \n train avg_loss:%g , acc:%g'%(i, avg_loss, get_accuracy(truth_res,pred_res)))


EPOCH = 3
for i in range(EPOCH):
    print('epoch: %d start!' % i)
    train_epoch(model, x_train, y_train, loss_function, optimizer, i)
    # print('now best dev acc:',best_dev_acc)
    # dev_acc = evaluate(model,dev_data,loss_function,word_to_ix,label_to_ix,'dev')
    # test_acc = evaluate(model, test_data, loss_function, word_to_ix, label_to_ix, 'test')
    # if dev_acc > best_dev_acc:
    #     best_dev_acc = dev_acc
    #     os.system('rm mr_best_model_acc_*.model')
    #     print('New Best Dev!!!')
    #     torch.save(model.state_dict(), 'best_models/mr_best_model_acc_' + str(int(test_acc*10000)) + '.model')
    #     no_up = 0
    # else:
    #     no_up += 1
    #     if no_up >= 10:
    #         exit()

evaluate(model, x_test, y_test, loss_function)

	from keras.preprocessing import sequence
	from keras.models import Sequential
	from keras.datasets import imdb

	import torch
	import torch.autograd as autograd
	import torch.nn as nn
	import torch.nn.functional as F
	import torch.optim as optim
	import os
	import random
	from torch.autograd import Variable

	max_features = 20000
	maxlen = 80 # cut texts after this number of words (among top max_features most common words)
	batch_size = 32

	print('Loading data...')
	(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
	print(len(x_train), 'train sequences')
	print(len(x_test), 'test sequences')

	print('Pad sequences (samples x time)')
	x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
	x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
	print('x_train shape:', x_train.shape)
	print('x_test shape:', x_test.shape)


	class LSTMClassifier(nn.Module):
	def __init__(self):
	super(LSTMClassifier, self).__init__()
	self.word_embeddings = nn.Embedding(20000, 128) #(max_features, embedding size)
	self.lstm = nn.LSTM(128, 128) #(embedding_dim, hidden_dim)
	self.hidden2label = nn.Linear(128, 2)#(hidden_dim, label_size)
	self.hidden = self.init_hidden()
	def init_hidden(self):
	# the first is the hidden h
	# the second is the cell c
	return (autograd.Variable(torch.zeros(1, 1, 128)), #(1, 1, self.hidden_dim)
	autograd.Variable(torch.zeros(1, 1, 128))) #(1, 1, self.hidden_dim)
	def forward(self, sentence):
	embeds = self.word_embeddings(sentence)
	x = embeds.view(len(sentence), 1, -1)
	lstm_out, self.hidden = self.lstm(x, self.hidden)
	y = self.hidden2label(lstm_out[-1])
	log_probs = F.log_softmax(y)
	return log_probs

	model = LSTMClassifier()


	model(Variable(torch.from_numpy(x_train[0]).long()))
	loss_function = nn.NLLLoss()
	optimizer = optim.Adam(model.parameters(),lr = 1e-3)

	import numpy as np




	def evaluate(model, data_x, data_y, loss_function):
	model.eval()
	avg_loss = 0.0
	truth_res = []
	pred_res = []
	for rep in range(len(data_x)):
	y = np.ndarray(1)
	y[0] = data_y[rep]
	truth_res.append(y[0])
	y = Variable(torch.from_numpy(y).long())
	# detaching it from its history on the last instance.
	# model.hidden = model.init_hidden()
	pred = model(Variable(torch.from_numpy(data_x[rep]).long()))
	pred_label = pred.data.max(1)[1].numpy()
	pred_res.append(pred_label)
	loss = loss_function(pred, y)
	avg_loss += loss.data[0]
	avg_loss /= len(data_x)
	acc = get_accuracy(truth_res, pred_res)
	print(name + ' avg_loss:%g train acc:%g' % (avg_loss, acc ))
	return acc



	def get_accuracy(truth, pred):
	assert len(truth)==len(pred)
	right = 0
	for i in range(len(truth)):
	if truth[i]==pred[i]:
	right += 1.0
	return right/len(truth)


	def train_epoch(model, train_data_x, train_data_y, loss_function, optimizer, i):
	model.train()
	avg_loss = 0.0
	count = 0
	truth_res = []
	pred_res = []
	batch_sent = []
	prev_loss = 0.0
	for rep in range(len(train_data_x)):
	y = np.ndarray(1)
	y[0] = train_data_y[rep]
	truth_res.append(y[0])
	y = Variable(torch.from_numpy(y).long())
	# detaching it from its history on the last instance.
	model.hidden = model.init_hidden()
	pred = model(Variable(torch.from_numpy(train_data_x[rep]).long()))
	pred_label = pred.data.max(1)[1].numpy()
	pred_res.append(pred_label)
	model.zero_grad()
	loss = loss_function(pred, y)
	avg_loss += loss.data[0]
	prev_loss += loss.data[0]
	count += 1
	if count % 200 == 0:
	print('epoch: %d iterations: %d loss :%g' % (i, count, prev_loss))
	prev_loss = 0.0
	loss.backward()
	optimizer.step()
	avg_loss /= len(train_data_x)
	print('epoch: %d done! \n train avg_loss:%g , acc:%g'%(i, avg_loss, get_accuracy(truth_res,pred_res)))



	EPOCH = 3
	for i in range(EPOCH):
	print('epoch: %d start!' % i)
	train_epoch(model, x_train, y_train, loss_function, optimizer, i)
	# print('now best dev acc:',best_dev_acc)
	# dev_acc = evaluate(model,dev_data,loss_function,word_to_ix,label_to_ix,'dev')
	# test_acc = evaluate(model, test_data, loss_function, word_to_ix, label_to_ix, 'test')
	# if dev_acc > best_dev_acc:
	# best_dev_acc = dev_acc
	# os.system('rm mr_best_model_acc_*.model')
	# print('New Best Dev!!!')
	# torch.save(model.state_dict(), 'best_models/mr_best_model_acc_' + str(int(test_acc*10000)) + '.model')
	# no_up = 0
	# else:
	# no_up += 1
	# if no_up >= 10:
	# exit()

	evaluate(model, x_test, y_test, loss_function)