jerinphilip/decoder.py

## decoder.py
mport torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
from ilmt.utils.gpu_profile import counter

class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers):
        super(DecoderRNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=2)

    def forward(self, seq, hidden, lm=False):
        output = self.embedding(seq)
        # output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        counter()
        input()
        output = self.out(output)
        output = self.softmax(output)
        output = output.squeeze(1)
        return output, hidden

    def cuda(self):
        self.use_cuda = True
        super().cuda()

    def initHidden(self, batch_size):
        result = Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size))
        if self.use_cuda:
            return result.cuda()
        else:
            return result

## encoder.py
import torch
import torch.nn as nn
from torch.autograd import Variable
from ilmt.utils.gpu_profile import counter

class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(EncoderRNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True)

    def forward(self, seq, hidden):
        embedded = self.embedding(seq)
        output, hidden = self.gru(embedded, hidden)
       counter()
        input()
        return output, hidden

    def cuda(self):
        self.use_cuda = True
        super().cuda()

    def initHidden(self, batch_size):
        result = Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size))
        if self.use_cuda:
            return result.cuda()
        else:
            return result

## gpu_profile.py
import datetime
import linecache
import os
from collections import Counter, defaultdict

import pynvml
import torch
from pprint import pprint
import inspect

def get_tensors(gpu_only=True):
    import gc
    for obj in gc.get_objects():
        try:
            if torch.is_tensor(obj):
                tensor = obj
            elif hasattr(obj, 'data') and torch.is_tensor(obj.data):
                tensor = obj.data
            else:
                continue

            if tensor.is_cuda:
                yield tensor
        except Exception as e:
            pass

progress = defaultdict(list)
iters = 0

def counter():
    global progress, iters
    iters = iters + 1
    tensors = list(get_tensors())
    sizes = list(map(lambda x: x.size(), tensors))
    for key, item in Counter(sizes).items():
        progress[key].append(item)
    for key in list(progress):
        if len(progress[key]) < iters:
            del progress[key]
    pprint(progress)

def parameters(model):
    for p in model.parameters():
        print(p.size())

## run.log
epoch:   0%|                                                                     | 0/10 [00:00<?, ?it/s/home/jerin/.local/lib/python3.5/site-packages/nltk/app/__init__.py:44: UserWarning: nltk.app.wordfreq not loaded (requires the matplotlib library).
  warnings.warn("nltk.app.wordfreq not loaded "
defaultdict(<class 'list'>,
            {torch.Size([3, 383, 500]): [4],
             torch.Size([383, 10]): [4],
             torch.Size([383, 10, 500]): [4],
             torch.Size([1500]): [24],
             torch.Size([1500, 500]): [24],
             torch.Size([3085]): [2],
             torch.Size([3085, 500]): [4],
             torch.Size([3258]): [2],
             torch.Size([3258, 500]): [4],
             torch.Size([4509000]): [1],
             torch.Size([153200000]): [1]})

defaultdict(<class 'list'>,
            {torch.Size([3, 383, 500]): [4, 2],
             torch.Size([383, 10, 500]): [4, 2],
             torch.Size([1500]): [24, 24],
             torch.Size([1500, 500]): [24, 24],
             torch.Size([3085]): [2, 2],
             torch.Size([3085, 500]): [4, 4],
             torch.Size([3258]): [2, 2],
             torch.Size([3258, 500]): [4, 4],
             torch.Size([4509000]): [1, 2],
             torch.Size([153200000]): [1, 1]})

[jerin@gnode33 machine-translation]$ python3 -m ilmt.scripts.run.trainer --save save/bouncer_vocab.pt --dataset bouncer --src en --tgt hi --epochs 10 --batch-size 640
epoch:   0%|                                                                     | 0/10 [00:00<?, ?it/s/home/jerin/.local/lib/python3.5/site-packages/nltk/app/__init__.py:44: UserWarning: nltk.app.wordfreq not loaded (requires the matplotlib library).
  warnings.warn("nltk.app.wordfreq not loaded "
defaultdict(<class 'list'>,
            {torch.Size([3, 383, 500]): [4],
             torch.Size([383, 10]): [4],
             torch.Size([383, 10, 500]): [4],
             torch.Size([1500]): [24],
             torch.Size([1500, 500]): [24],
             torch.Size([3085]): [2],
             torch.Size([3085, 500]): [4],
             torch.Size([3258]): [2],
             torch.Size([3258, 500]): [4],
             torch.Size([4509000]): [1],
             torch.Size([153200000]): [1]})

defaultdict(<class 'list'>,
            {torch.Size([3, 383, 500]): [4, 2],
             torch.Size([383, 10, 500]): [4, 2],
             torch.Size([1500]): [24, 24],
             torch.Size([1500, 500]): [24, 24],
             torch.Size([3085]): [2, 2],
             torch.Size([3085, 500]): [4, 4],
             torch.Size([3258]): [2, 2],
             torch.Size([3258, 500]): [4, 4],
             torch.Size([4509000]): [1, 2],
             torch.Size([153200000]): [1, 1]})

defaultdict(<class 'list'>,
            {torch.Size([3, 383, 500]): [4, 2, 2],
             torch.Size([383, 10, 500]): [4, 2, 2],
             torch.Size([1500]): [24, 24, 24],
             torch.Size([1500, 500]): [24, 24, 24],
             torch.Size([3085]): [2, 2, 2],
             torch.Size([3085, 500]): [4, 4, 4],
             torch.Size([3258]): [2, 2, 2],
             torch.Size([3258, 500]): [4, 4, 4],
             torch.Size([4509000]): [1, 2, 3],
             torch.Size([153200000]): [1, 1, 1]})

defaultdict(<class 'list'>,
            {torch.Size([3, 383, 500]): [4, 2, 2, 2],
             torch.Size([383, 10, 500]): [4, 2, 2, 2],
             torch.Size([1500]): [24, 24, 24, 24],
             torch.Size([1500, 500]): [24, 24, 24, 24],
             torch.Size([3085]): [2, 2, 2, 2],
             torch.Size([3085, 500]): [4, 4, 4, 4],
             torch.Size([3258]): [2, 2, 2, 2],
             torch.Size([3258, 500]): [4, 4, 4, 4],
             torch.Size([4509000]): [1, 2, 3, 4],
             torch.Size([153200000]): [1, 1, 1, 1]})


batch:   7%|████                                                         | 1/15 [00:06<01:32,  6.63s/it]defaultdict(<class 'list'>,
            {torch.Size([3, 383, 500]): [4, 2, 2, 2, 2],
             torch.Size([383, 10, 500]): [4, 2, 2, 2, 2],
             torch.Size([1500]): [24, 24, 24, 24, 24],
             torch.Size([1500, 500]): [24, 24, 24, 24, 24],
             torch.Size([3085]): [2, 2, 2, 2, 2],
             torch.Size([3085, 500]): [4, 4, 4, 4, 4],
             torch.Size([3258]): [2, 2, 2, 2, 2],
             torch.Size([3258, 500]): [4, 4, 4, 4, 4],
             torch.Size([4509000]): [1, 2, 3, 4, 5],
             torch.Size([153200000]): [1, 1, 1, 1, 1]})
	mport torch
	import torch.nn as nn
	from torch.autograd import Variable
	import torch.nn.functional as F
	from ilmt.utils.gpu_profile import counter

	class DecoderRNN(nn.Module):
	def __init__(self, hidden_size, output_size, num_layers):
	super(DecoderRNN, self).__init__()
	self.num_layers = num_layers
	self.hidden_size = hidden_size

	self.embedding = nn.Embedding(output_size, hidden_size)
	self.gru = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True)
	self.out = nn.Linear(hidden_size, output_size)
	self.softmax = nn.LogSoftmax(dim=2)

	def forward(self, seq, hidden, lm=False):
	output = self.embedding(seq)
	# output = F.relu(output)
	output, hidden = self.gru(output, hidden)
	counter()
	input()
	output = self.out(output)
	output = self.softmax(output)
	output = output.squeeze(1)
	return output, hidden

	def cuda(self):
	self.use_cuda = True
	super().cuda()

	def initHidden(self, batch_size):
	result = Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size))
	if self.use_cuda:
	return result.cuda()
	else:
	return result
	import torch
	import torch.nn as nn
	from torch.autograd import Variable
	from ilmt.utils.gpu_profile import counter

	class EncoderRNN(nn.Module):
	def __init__(self, input_size, hidden_size, num_layers):
	super(EncoderRNN, self).__init__()
	self.num_layers = num_layers
	self.hidden_size = hidden_size

	self.embedding = nn.Embedding(input_size, hidden_size)
	self.gru = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True)

	def forward(self, seq, hidden):
	embedded = self.embedding(seq)
	output, hidden = self.gru(embedded, hidden)
	counter()
	input()
	return output, hidden

	def cuda(self):
	self.use_cuda = True
	super().cuda()

	def initHidden(self, batch_size):
	result = Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size))
	if self.use_cuda:
	return result.cuda()
	else:
	return result
	import datetime
	import linecache
	import os
	from collections import Counter, defaultdict

	import pynvml
	import torch
	from pprint import pprint
	import inspect

	def get_tensors(gpu_only=True):
	import gc
	for obj in gc.get_objects():
	try:
	if torch.is_tensor(obj):
	tensor = obj
	elif hasattr(obj, 'data') and torch.is_tensor(obj.data):
	tensor = obj.data
	else:
	continue

	if tensor.is_cuda:
	yield tensor
	except Exception as e:
	pass

	progress = defaultdict(list)
	iters = 0

	def counter():
	global progress, iters
	iters = iters + 1
	tensors = list(get_tensors())
	sizes = list(map(lambda x: x.size(), tensors))
	for key, item in Counter(sizes).items():
	progress[key].append(item)
	for key in list(progress):
	if len(progress[key]) < iters:
	del progress[key]
	pprint(progress)

	def parameters(model):
	for p in model.parameters():
	print(p.size())
	epoch: 0%\| \| 0/10 [00:00<?, ?it/s/home/jerin/.local/lib/python3.5/site-packages/nltk/app/__init__.py:44: UserWarning: nltk.app.wordfreq not loaded (requires the matplotlib library).
	warnings.warn("nltk.app.wordfreq not loaded "
	defaultdict(<class 'list'>,
	{torch.Size([3, 383, 500]): [4],
	torch.Size([383, 10]): [4],
	torch.Size([383, 10, 500]): [4],
	torch.Size([1500]): [24],
	torch.Size([1500, 500]): [24],
	torch.Size([3085]): [2],
	torch.Size([3085, 500]): [4],
	torch.Size([3258]): [2],
	torch.Size([3258, 500]): [4],
	torch.Size([4509000]): [1],
	torch.Size([153200000]): [1]})

	defaultdict(<class 'list'>,
	{torch.Size([3, 383, 500]): [4, 2],
	torch.Size([383, 10, 500]): [4, 2],
	torch.Size([1500]): [24, 24],
	torch.Size([1500, 500]): [24, 24],
	torch.Size([3085]): [2, 2],
	torch.Size([3085, 500]): [4, 4],
	torch.Size([3258]): [2, 2],
	torch.Size([3258, 500]): [4, 4],
	torch.Size([4509000]): [1, 2],
	torch.Size([153200000]): [1, 1]})

	[jerin@gnode33 machine-translation]$ python3 -m ilmt.scripts.run.trainer --save save/bouncer_vocab.pt --dataset bouncer --src en --tgt hi --epochs 10 --batch-size 640
	epoch: 0%\| \| 0/10 [00:00<?, ?it/s/home/jerin/.local/lib/python3.5/site-packages/nltk/app/__init__.py:44: UserWarning: nltk.app.wordfreq not loaded (requires the matplotlib library).
	warnings.warn("nltk.app.wordfreq not loaded "
	defaultdict(<class 'list'>,
	{torch.Size([3, 383, 500]): [4],
	torch.Size([383, 10]): [4],
	torch.Size([383, 10, 500]): [4],
	torch.Size([1500]): [24],
	torch.Size([1500, 500]): [24],
	torch.Size([3085]): [2],
	torch.Size([3085, 500]): [4],
	torch.Size([3258]): [2],
	torch.Size([3258, 500]): [4],
	torch.Size([4509000]): [1],
	torch.Size([153200000]): [1]})

	defaultdict(<class 'list'>,
	{torch.Size([3, 383, 500]): [4, 2],
	torch.Size([383, 10, 500]): [4, 2],
	torch.Size([1500]): [24, 24],
	torch.Size([1500, 500]): [24, 24],
	torch.Size([3085]): [2, 2],
	torch.Size([3085, 500]): [4, 4],
	torch.Size([3258]): [2, 2],
	torch.Size([3258, 500]): [4, 4],
	torch.Size([4509000]): [1, 2],
	torch.Size([153200000]): [1, 1]})

	defaultdict(<class 'list'>,
	{torch.Size([3, 383, 500]): [4, 2, 2],
	torch.Size([383, 10, 500]): [4, 2, 2],
	torch.Size([1500]): [24, 24, 24],
	torch.Size([1500, 500]): [24, 24, 24],
	torch.Size([3085]): [2, 2, 2],
	torch.Size([3085, 500]): [4, 4, 4],
	torch.Size([3258]): [2, 2, 2],
	torch.Size([3258, 500]): [4, 4, 4],
	torch.Size([4509000]): [1, 2, 3],
	torch.Size([153200000]): [1, 1, 1]})

	defaultdict(<class 'list'>,
	{torch.Size([3, 383, 500]): [4, 2, 2, 2],
	torch.Size([383, 10, 500]): [4, 2, 2, 2],
	torch.Size([1500]): [24, 24, 24, 24],
	torch.Size([1500, 500]): [24, 24, 24, 24],
	torch.Size([3085]): [2, 2, 2, 2],
	torch.Size([3085, 500]): [4, 4, 4, 4],
	torch.Size([3258]): [2, 2, 2, 2],
	torch.Size([3258, 500]): [4, 4, 4, 4],
	torch.Size([4509000]): [1, 2, 3, 4],
	torch.Size([153200000]): [1, 1, 1, 1]})


	batch: 7%\|████ \| 1/15 [00:06<01:32, 6.63s/it]defaultdict(<class 'list'>,
	{torch.Size([3, 383, 500]): [4, 2, 2, 2, 2],
	torch.Size([383, 10, 500]): [4, 2, 2, 2, 2],
	torch.Size([1500]): [24, 24, 24, 24, 24],
	torch.Size([1500, 500]): [24, 24, 24, 24, 24],
	torch.Size([3085]): [2, 2, 2, 2, 2],
	torch.Size([3085, 500]): [4, 4, 4, 4, 4],
	torch.Size([3258]): [2, 2, 2, 2, 2],
	torch.Size([3258, 500]): [4, 4, 4, 4, 4],
	torch.Size([4509000]): [1, 2, 3, 4, 5],
	torch.Size([153200000]): [1, 1, 1, 1, 1]})