Skip to content

Instantly share code, notes, and snippets.

@keon
Last active April 7, 2019 08:40
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save keon/e39d3cbfd80daff498772951fb784f35 to your computer and use it in GitHub Desktop.
Save keon/e39d3cbfd80daff498772951fb784f35 to your computer and use it in GitHub Desktop.
basic mini encoder decoder model that translates 'hello' to 'hola'
# coding: utf-8
"""
Seq2Seq (Encoder-Decoder) Model
this model is the basic encoder decoder model without attention mechanism.
author: Keon Kim
"""
import numpy as np
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch import optim
vocab_size = 256 # ascii size
x_ = list(map(ord, "hello")) # convert to list of ascii codes
y_ = list(map(ord, "hola")) # convert to list of ascii codes
print("hello -> ", x_)
print("hola -> ", y_)
x = Variable(th.LongTensor(x_))
y = Variable(th.LongTensor(y_))
class Seq2Seq(nn.Module):
def __init__(self, vocab_size, hidden_size):
super(Seq2Seq, self).__init__()
self.n_layers = 1
self.hidden_size = hidden_size
self.embedding = nn.Embedding(vocab_size, hidden_size)
self.encoder = nn.LSTM(hidden_size, hidden_size)
self.decoder = nn.LSTM(hidden_size, hidden_size)
self.project = nn.Linear(hidden_size, vocab_size)
def forward(self, inputs, targets):
# Encoder inputs and states
initial_state = self._init_state()
embedding = self.embedding(inputs).unsqueeze(1)
# embedding = [seq_len, batch_size, embedding_size]
# Encoder
encoder_output, encoder_state = self.encoder(embedding, initial_state)
# encoder_output = [seq_len, batch_size, hidden_size]
# encoder_state = [n_layers, seq_len, hidden_size]
# Decoder inputs and states
decoder_state = encoder_state
decoder_input = Variable(th.LongTensor([[0]]))
# Decoder
outputs = []
for i in range(targets.size()[0]):
decoder_input = self.embedding(decoder_input)
decoder_output, decoder_state = self.decoder(decoder_input, decoder_state)
# Project to the vocabulary size
projection = self.project(decoder_output.view(1, -1)) # batch x vocab_size
# Make prediction
prediction = F.softmax(projection) # batch x vocab_size
outputs.append(prediction)
# update decoder input
_, top_i = prediction.data.topk(1) # 1 x 1
decoder_input = Variable(top_i)
outputs = th.stack(outputs).squeeze()
return outputs
def _init_state(self, batch_size=1):
weight = next(self.parameters()).data
return (
Variable(weight.new(self.n_layers, batch_size, self.hidden_size).zero_()),
Variable(weight.new(self.n_layers, batch_size, self.hidden_size).zero_())
)
seq2seq = Seq2Seq(vocab_size, 16)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(seq2seq.parameters(), lr=1e-3)
for i in range(1000):
prediction = seq2seq(x, y)
loss = criterion(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_val = loss.data[0]
if i % 100 == 0:
print("%d loss: %s" % (i, loss_val))
_, top1 = prediction.data.topk(1, 1)
for c in top1.squeeze().numpy().tolist():
print(chr(c), end=" ")
print()
@keon
Copy link
Author

keon commented Mar 28, 2017

output:

hello ->  [104, 101, 108, 108, 111]
hola  ->  [104, 111, 108, 97]
0 loss: 5.544624328613281
µ µ µ µ 
100 loss: 5.306090354919434
h h l l 
200 loss: 5.027240753173828
h l l a 
300 loss: 4.846072196960449
h o a a 
400 loss: 4.6610002517700195
h o l a 
500 loss: 4.608636856079102
h o l a 
600 loss: 4.58699893951416
h o l a 
700 loss: 4.574211120605469
h o l a 
800 loss: 4.567699909210205
h o l a 
900 loss: 4.56412410736084
h o l a 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment