Skip to content

Instantly share code, notes, and snippets.

@magic282
Last active Jun 24, 2020
Embed
What would you like to do?
PyTorch Issue
import torch
from torch import nn
from torch.autograd import Variable as Var
from torch.optim import SGD
from torch.nn.utils.rnn import pad_packed_sequence as unpack
from torch.nn.utils.rnn import pack_padded_sequence as pack
import random
random.seed(12345)
torch.manual_seed(12345)
torch.cuda.manual_seed(12345)
class TestModule(nn.Module):
def __init__(self):
input_size = 128
super(TestModule, self).__init__()
self.word_lut = nn.Embedding(128, 128, padding_idx=0)
self.rnn = nn.GRU(input_size, 128,
num_layers=1,
dropout=0.5,
bidirectional=True)
# self.rnn.flatten_parameters = lambda *args, **kwargs: None
def forward(self, x):
lengths = x[1].data.view(-1).tolist()
data = x[0]
emb = self.word_lut(data)
emb = pack(emb, lengths)
outputs, hidden_t = self.rnn(emb)
if isinstance(input, tuple):
outputs = unpack(outputs)[0]
return hidden_t, outputs
def save_model(model, file):
model_state_dict = model.state_dict()
checkpoint = {
'model': model_state_dict,
}
torch.save(checkpoint, file)
def load_model(model, file):
checkpoint = torch.load(file)
model.load_state_dict(checkpoint['model'])
def save_model_2(model, file):
import h5py
model_state_dict = model.state_dict()
h5f = h5py.File('test.h5', 'w')
for name, p in model_state_dict.items():
np_tensor = p.cpu().numpy()
h5f.create_dataset(name, data=np_tensor, dtype='float32')
h5f.close()
def load_model_2(model, file):
import h5py
h5f = h5py.File('test.h5', 'r')
print('pause')
checkpoint = {}
for name in h5f.keys():
checkpoint[name] = torch.Tensor(h5f[name][:])
h5f.close()
model.load_state_dict(checkpoint)
def main():
model = TestModule().cuda()
optim = SGD(model.parameters(), lr=0.01)
batch_size = 64
dict_size = 128
min_sent_len = 60
max_sent_len = 100
data_buf = []
lengths = []
fake_label = []
for i in range(batch_size):
sent_len = random.randint(min_sent_len, max_sent_len)
sent = [random.randint(1, dict_size) for _ in range(sent_len)]
data_buf.append(torch.LongTensor(sent))
lengths.append(sent_len)
fake_label.append(sent_len % 2)
data_buf, fake_label, lengths = zip(*sorted(zip(data_buf, fake_label, lengths), key=lambda x: -x[-1]))
sents = torch.LongTensor(batch_size, max_sent_len).zero_()
for i in range(len(data_buf)):
data_length = data_buf[i].size(0)
sents[i].narrow(0, 0, data_length).copy_(data_buf[i])
lengths = torch.LongTensor(lengths)
dataset = (Var(sents.transpose(0, 1).contiguous().cuda()), Var(lengths.cuda()))
hidden, _ = model(dataset)
hidden = hidden.transpose(0, 1).contiguous().view(hidden.size(1), -1)
classifier = nn.Sequential(
nn.Linear(128 * 2, 2),
nn.LogSoftmax(),
)
classifier.cuda()
targets = Var(torch.LongTensor(fake_label).view(-1).cuda())
pred = classifier(hidden)
loss_fn = nn.NLLLoss(size_average=False)
loss = loss_fn(pred, targets)
print(loss.data[0])
model.zero_grad()
loss.backward()
optim.step()
# now save it
save_model(model, 'check')
# check the loss
hidden_1, _ = model(dataset)
hidden_1 = hidden_1.transpose(0, 1).contiguous().view(hidden_1.size(1), -1)
pred = classifier(hidden_1)
loss_fn = nn.NLLLoss(size_average=False)
loss = loss_fn(pred, targets)
print(loss.data[0])
# we check it again
hidden_1, _ = model(dataset)
hidden_1 = hidden_1.transpose(0, 1).contiguous().view(hidden_1.size(1), -1)
pred = classifier(hidden_1)
loss_fn = nn.NLLLoss(size_average=False)
loss = loss_fn(pred, targets)
print(loss.data[0])
# now we load the model, and check
load_model(model, 'check')
hidden_2, _ = model(dataset)
hidden_2 = hidden_2.transpose(0, 1).contiguous().view(hidden_2.size(1), -1)
pred = classifier(hidden_2)
loss_fn = nn.NLLLoss(size_average=False)
loss = loss_fn(pred, targets)
print(loss.data[0])
# suppose we are testing after training
# we have a new model
# we load it and test
model_2 = TestModule().cuda()
model_2.zero_grad()
load_model(model_2, 'check')
hidden_2, _ = model_2(dataset)
hidden_2 = hidden_2.transpose(0, 1).contiguous().view(hidden_2.size(1), -1)
pred = classifier(hidden_2)
loss_fn = nn.NLLLoss(size_average=False)
loss = loss_fn(pred, targets)
print(loss.data[0])
# check every parameter matrix
orig_data = [x for x in model.named_parameters()]
now_data = [x for x in model_2.named_parameters()]
for orig, now in zip(orig_data, now_data):
assert orig[0] == now[0]
orig = orig[1].data
now = now[1].data
if len(orig.shape) != 2:
continue
for i in range(orig.shape[0]):
for j in range(orig.shape[1]):
if orig[i][j] != now[i][j]:
print('f')
count = 0
diff_count = 0
tensor_size = hidden_1.size()
for i in range(tensor_size[0]):
for j in range(tensor_size[1]):
count += 1
if hidden_1.data[i][j] != hidden_2.data[i][j]:
# print('{0}\t{1}'.format(hidden_1.data[i][j], hidden_2.data[i][j]))
diff_count += 1
# pass
print('{0} / {1}'.format(diff_count, count))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment