Skip to content

Instantly share code, notes, and snippets.

@t-vi
Created March 20, 2017 21:31
Show Gist options
  • Save t-vi/ea01a032faa1beefc78531e6e292add5 to your computer and use it in GitHub Desktop.
Save t-vi/ea01a032faa1beefc78531e6e292add5 to your computer and use it in GitHub Desktop.
Memory usage of LSTM
# This is taken from https://github.com/yunjey/pytorch-tutorial with just a few changes.
# Please see there for copyright and license information and use that copy.
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
import gc
# helper function to get rss size, see stat(5) under statm. This is in pages...
def memory_usage():
return int(open('/proc/self/statm').read().split()[1])
# Hyper Parameters
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 100
learning_rate = 0.003
# MNIST Dataset
train_dataset = dsets.MNIST(root='../data/',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = dsets.MNIST(root='../data/',
train=False,
transform=transforms.ToTensor())
# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
# BiRNN Model (Many-to-One)
class BiRNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(BiRNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
batch_first=True, bidirectional=True)
self.fc = nn.Linear(hidden_size*2, num_classes) # 2 for bidirection
def forward(self, x):
# Set initial states
h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).cuda() # 2 for bidirection
c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).cuda()
# Forward propagate RNN
out, _ = self.lstm(x, (h0, c0))
# Decode hidden state of last time step
out = self.fc(out[:, -1, :])
return out
rnn = BiRNN(input_size, hidden_size, num_layers, num_classes)
rnn.cuda()
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
# Train the Model
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = Variable(images.view(-1, sequence_length, input_size)).cuda()
labels = Variable(labels).cuda()
# Forward + Backward + Optimize
optimizer.zero_grad()
outputs = rnn(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
gc.collect()
if (i+1) % 600 == 0:
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
print ("mem usage",memory_usage())
# Test the Model
correct = 0
total = 0
for images, labels in test_loader:
images = Variable(images.view(-1, sequence_length, input_size)).cuda()
outputs = rnn(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted.cpu() == labels).sum()
print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment