Created
March 20, 2017 21:31
-
-
Save t-vi/ea01a032faa1beefc78531e6e292add5 to your computer and use it in GitHub Desktop.
Memory usage of LSTM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is taken from https://github.com/yunjey/pytorch-tutorial with just a few changes. | |
# Please see there for copyright and license information and use that copy. | |
import torch | |
import torch.nn as nn | |
import torchvision.datasets as dsets | |
import torchvision.transforms as transforms | |
from torch.autograd import Variable | |
import gc | |
# helper function to get rss size, see stat(5) under statm. This is in pages... | |
def memory_usage(): | |
return int(open('/proc/self/statm').read().split()[1]) | |
# Hyper Parameters | |
sequence_length = 28 | |
input_size = 28 | |
hidden_size = 128 | |
num_layers = 2 | |
num_classes = 10 | |
batch_size = 100 | |
num_epochs = 100 | |
learning_rate = 0.003 | |
# MNIST Dataset | |
train_dataset = dsets.MNIST(root='../data/', | |
train=True, | |
transform=transforms.ToTensor(), | |
download=True) | |
test_dataset = dsets.MNIST(root='../data/', | |
train=False, | |
transform=transforms.ToTensor()) | |
# Data Loader (Input Pipeline) | |
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, | |
batch_size=batch_size, | |
shuffle=True) | |
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, | |
batch_size=batch_size, | |
shuffle=False) | |
# BiRNN Model (Many-to-One) | |
class BiRNN(nn.Module): | |
def __init__(self, input_size, hidden_size, num_layers, num_classes): | |
super(BiRNN, self).__init__() | |
self.hidden_size = hidden_size | |
self.num_layers = num_layers | |
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, | |
batch_first=True, bidirectional=True) | |
self.fc = nn.Linear(hidden_size*2, num_classes) # 2 for bidirection | |
def forward(self, x): | |
# Set initial states | |
h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).cuda() # 2 for bidirection | |
c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).cuda() | |
# Forward propagate RNN | |
out, _ = self.lstm(x, (h0, c0)) | |
# Decode hidden state of last time step | |
out = self.fc(out[:, -1, :]) | |
return out | |
rnn = BiRNN(input_size, hidden_size, num_layers, num_classes) | |
rnn.cuda() | |
# Loss and Optimizer | |
criterion = nn.CrossEntropyLoss() | |
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate) | |
# Train the Model | |
for epoch in range(num_epochs): | |
for i, (images, labels) in enumerate(train_loader): | |
images = Variable(images.view(-1, sequence_length, input_size)).cuda() | |
labels = Variable(labels).cuda() | |
# Forward + Backward + Optimize | |
optimizer.zero_grad() | |
outputs = rnn(images) | |
loss = criterion(outputs, labels) | |
loss.backward() | |
optimizer.step() | |
gc.collect() | |
if (i+1) % 600 == 0: | |
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' | |
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0])) | |
print ("mem usage",memory_usage()) | |
# Test the Model | |
correct = 0 | |
total = 0 | |
for images, labels in test_loader: | |
images = Variable(images.view(-1, sequence_length, input_size)).cuda() | |
outputs = rnn(images) | |
_, predicted = torch.max(outputs.data, 1) | |
total += labels.size(0) | |
correct += (predicted.cpu() == labels).sum() | |
print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment