-
-
Save magic282/94bdbb3b9ddef891d7aa40f0b0069a0f to your computer and use it in GitHub Desktop.
PyTorch Issue
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch import nn | |
from torch.autograd import Variable as Var | |
from torch.optim import SGD | |
from torch.nn.utils.rnn import pad_packed_sequence as unpack | |
from torch.nn.utils.rnn import pack_padded_sequence as pack | |
import random | |
random.seed(12345) | |
torch.manual_seed(12345) | |
torch.cuda.manual_seed(12345) | |
class TestModule(nn.Module): | |
def __init__(self): | |
input_size = 128 | |
super(TestModule, self).__init__() | |
self.word_lut = nn.Embedding(128, 128, padding_idx=0) | |
self.rnn = nn.GRU(input_size, 128, | |
num_layers=1, | |
dropout=0.5, | |
bidirectional=True) | |
# self.rnn.flatten_parameters = lambda *args, **kwargs: None | |
def forward(self, x): | |
lengths = x[1].data.view(-1).tolist() | |
data = x[0] | |
emb = self.word_lut(data) | |
emb = pack(emb, lengths) | |
outputs, hidden_t = self.rnn(emb) | |
if isinstance(input, tuple): | |
outputs = unpack(outputs)[0] | |
return hidden_t, outputs | |
def save_model(model, file): | |
model_state_dict = model.state_dict() | |
checkpoint = { | |
'model': model_state_dict, | |
} | |
torch.save(checkpoint, file) | |
def load_model(model, file): | |
checkpoint = torch.load(file) | |
model.load_state_dict(checkpoint['model']) | |
def save_model_2(model, file): | |
import h5py | |
model_state_dict = model.state_dict() | |
h5f = h5py.File('test.h5', 'w') | |
for name, p in model_state_dict.items(): | |
np_tensor = p.cpu().numpy() | |
h5f.create_dataset(name, data=np_tensor, dtype='float32') | |
h5f.close() | |
def load_model_2(model, file): | |
import h5py | |
h5f = h5py.File('test.h5', 'r') | |
print('pause') | |
checkpoint = {} | |
for name in h5f.keys(): | |
checkpoint[name] = torch.Tensor(h5f[name][:]) | |
h5f.close() | |
model.load_state_dict(checkpoint) | |
def main(): | |
model = TestModule().cuda() | |
optim = SGD(model.parameters(), lr=0.01) | |
batch_size = 64 | |
dict_size = 128 | |
min_sent_len = 60 | |
max_sent_len = 100 | |
data_buf = [] | |
lengths = [] | |
fake_label = [] | |
for i in range(batch_size): | |
sent_len = random.randint(min_sent_len, max_sent_len) | |
sent = [random.randint(1, dict_size) for _ in range(sent_len)] | |
data_buf.append(torch.LongTensor(sent)) | |
lengths.append(sent_len) | |
fake_label.append(sent_len % 2) | |
data_buf, fake_label, lengths = zip(*sorted(zip(data_buf, fake_label, lengths), key=lambda x: -x[-1])) | |
sents = torch.LongTensor(batch_size, max_sent_len).zero_() | |
for i in range(len(data_buf)): | |
data_length = data_buf[i].size(0) | |
sents[i].narrow(0, 0, data_length).copy_(data_buf[i]) | |
lengths = torch.LongTensor(lengths) | |
dataset = (Var(sents.transpose(0, 1).contiguous().cuda()), Var(lengths.cuda())) | |
hidden, _ = model(dataset) | |
hidden = hidden.transpose(0, 1).contiguous().view(hidden.size(1), -1) | |
classifier = nn.Sequential( | |
nn.Linear(128 * 2, 2), | |
nn.LogSoftmax(), | |
) | |
classifier.cuda() | |
targets = Var(torch.LongTensor(fake_label).view(-1).cuda()) | |
pred = classifier(hidden) | |
loss_fn = nn.NLLLoss(size_average=False) | |
loss = loss_fn(pred, targets) | |
print(loss.data[0]) | |
model.zero_grad() | |
loss.backward() | |
optim.step() | |
# now save it | |
save_model(model, 'check') | |
# check the loss | |
hidden_1, _ = model(dataset) | |
hidden_1 = hidden_1.transpose(0, 1).contiguous().view(hidden_1.size(1), -1) | |
pred = classifier(hidden_1) | |
loss_fn = nn.NLLLoss(size_average=False) | |
loss = loss_fn(pred, targets) | |
print(loss.data[0]) | |
# we check it again | |
hidden_1, _ = model(dataset) | |
hidden_1 = hidden_1.transpose(0, 1).contiguous().view(hidden_1.size(1), -1) | |
pred = classifier(hidden_1) | |
loss_fn = nn.NLLLoss(size_average=False) | |
loss = loss_fn(pred, targets) | |
print(loss.data[0]) | |
# now we load the model, and check | |
load_model(model, 'check') | |
hidden_2, _ = model(dataset) | |
hidden_2 = hidden_2.transpose(0, 1).contiguous().view(hidden_2.size(1), -1) | |
pred = classifier(hidden_2) | |
loss_fn = nn.NLLLoss(size_average=False) | |
loss = loss_fn(pred, targets) | |
print(loss.data[0]) | |
# suppose we are testing after training | |
# we have a new model | |
# we load it and test | |
model_2 = TestModule().cuda() | |
model_2.zero_grad() | |
load_model(model_2, 'check') | |
hidden_2, _ = model_2(dataset) | |
hidden_2 = hidden_2.transpose(0, 1).contiguous().view(hidden_2.size(1), -1) | |
pred = classifier(hidden_2) | |
loss_fn = nn.NLLLoss(size_average=False) | |
loss = loss_fn(pred, targets) | |
print(loss.data[0]) | |
# check every parameter matrix | |
orig_data = [x for x in model.named_parameters()] | |
now_data = [x for x in model_2.named_parameters()] | |
for orig, now in zip(orig_data, now_data): | |
assert orig[0] == now[0] | |
orig = orig[1].data | |
now = now[1].data | |
if len(orig.shape) != 2: | |
continue | |
for i in range(orig.shape[0]): | |
for j in range(orig.shape[1]): | |
if orig[i][j] != now[i][j]: | |
print('f') | |
count = 0 | |
diff_count = 0 | |
tensor_size = hidden_1.size() | |
for i in range(tensor_size[0]): | |
for j in range(tensor_size[1]): | |
count += 1 | |
if hidden_1.data[i][j] != hidden_2.data[i][j]: | |
# print('{0}\t{1}'.format(hidden_1.data[i][j], hidden_2.data[i][j])) | |
diff_count += 1 | |
# pass | |
print('{0} / {1}'.format(diff_count, count)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment