Skip to content

Instantly share code, notes, and snippets.

@rakuishi
Created February 17, 2018 12:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rakuishi/d5ad7191c032c116b0b2bfad228802e0 to your computer and use it in GitHub Desktop.
Save rakuishi/d5ad7191c032c116b0b2bfad228802e0 to your computer and use it in GitHub Desktop.
# 0.1176554
def homework(train_X, train_Y, tokenizer_en, tokenizer_ja):
import numpy as np
from keras.models import Model
from keras.layers import Input, Embedding, Dense, LSTM
emb_dim = 256
hid_dim = 256
en_vocab_size = len(tokenizer_en.word_index) + 1
ja_vocab_size = len(tokenizer_ja.word_index) + 1
seqX_len = len(train_X[0])
seqY_len = len(train_Y[0])
encoder_inputs = Input(shape=(seqX_len,))
encoder_embedded = Embedding(en_vocab_size, emb_dim, mask_zero=True)(encoder_inputs)
_, *encoder_states = LSTM(hid_dim, return_state=True)(encoder_embedded)
decoder_inputs = Input(shape=(seqY_len,))
decoder_embedding = Embedding(ja_vocab_size, emb_dim)
decoder_embedded = decoder_embedding(decoder_inputs)
decoder_lstm = LSTM(hid_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedded, initial_state=encoder_states)
decoder_dense = Dense(ja_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')
train_target = np.hstack((train_Y[:, 1:], np.zeros((len(train_Y),1), dtype=np.int32)))
model.fit([train_X, train_Y], np.expand_dims(train_target, -1), batch_size=128, epochs=10, verbose=2, validation_split=0.2)
encoder_model = Model(encoder_inputs, encoder_states)
decoder_states_inputs = [Input(shape=(hid_dim,)), Input(shape=(hid_dim,))]
decoder_inputs = Input(shape=(1,))
decoder_embedded = decoder_embedding(decoder_inputs)
decoder_outputs, *decoder_states = decoder_lstm(decoder_embedded, initial_state=decoder_states_inputs)
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
def decode_sequence(input_seq, bos_eos, max_output_length):
states_value = encoder_model.predict(input_seq)
target_seq = np.array(bos_eos[0]) # bos_and_eos[0]="<s>"に対応するインデックス
output_seq = bos_eos[0][:]
while True:
output_tokens, *states_value = decoder_model.predict([target_seq] + states_value)
sampled_token_index = [np.argmax(output_tokens[0, -1, :])]
output_seq += sampled_token_index
if (sampled_token_index == bos_eos[1] or len(output_seq) > max_output_length):
break
target_seq = np.array(sampled_token_index)
return output_seq
return decode_sequence
# IndexError
def homework(train_X, train_Y, tokenizer_en, tokenizer_ja):
import numpy as np
from keras.models import Model
from keras.layers import Input, Permute, Activation, Embedding, Dense, LSTM, concatenate, dot
from keras import backend as K
# 定数
emb_dim = 256
hid_dim = 256
att_dim = 256
en_vocab_size = len(tokenizer_en.word_index) + 1
ja_vocab_size = len(tokenizer_ja.word_index) + 1
seqX_len = len(train_X[0])
seqY_len = len(train_Y[0])
# ----- モデル構築 ----- #
# 符号化器
encoder_inputs = Input(shape=(seqX_len,))
encoder_embeded = Embedding(en_vocab_size, emb_dim, mask_zero=True)(encoder_inputs)
encoded_seq, *encoder_states = LSTM(hid_dim, return_sequences=True, return_state=True)(encoder_embeded)
# 復号化器
decoder_inputs = Input(shape=(seqY_len,))
decoder_embeding = Embedding(ja_vocab_size, emb_dim)
decoder_embeded = decoder_embeding(decoder_inputs)
decoder_lstm = LSTM(hid_dim, return_sequences=True, return_state=True)
decoded_seq, _, _ = decoder_lstm(decoder_embeded, initial_state=encoder_states)
# Attention
score_dense = Dense(hid_dim)
score = score_dense(decoded_seq)
score = dot([score, encoded_seq], axes=(2,2))
attention = Activation('softmax')(score)
context = dot([attention, encoded_seq], axes=(2,1))
concat = concatenate([context, decoded_seq], axis=2)
attention_dense = Dense(att_dim, activation='tanh')
attentional = attention_dense(concat)
output_dense = Dense(ja_vocab_size, activation='softmax')
outputs = output_dense(attentional)
model = Model([encoder_inputs, decoder_inputs], outputs)
model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')
train_target = np.hstack((train_Y[:, 1:], np.zeros((len(train_Y),1), dtype=np.int32)))
model.fit([train_X, train_Y], np.expand_dims(train_target, -1), batch_size=128, epochs=10, verbose=2, validation_split=0.2)
# ----- モデルによる生成 ----- #
encoder_model = Model(encoder_inputs, [encoded_seq]+encoder_states)
decoder_states_inputs = [Input(shape=(hid_dim,)), Input(shape=(hid_dim,))]
decoder_inputs = Input(shape=(1,))
decoder_embeded = decoder_embeding(decoder_inputs)
decoded_seq, *decoder_states = decoder_lstm(decoder_embeded, initial_state=decoder_states_inputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoded_seq] + decoder_states)
# Attention
encoded_seq_in, decoded_seq_in = Input(shape=(seqX_len, hid_dim)), Input(shape=(1, hid_dim))
score = score_dense(decoded_seq_in)
score = dot([score, encoded_seq_in], axes=(2,2))
attention = Activation('softmax')(score)
context = dot([attention, encoded_seq_in], axes=(2,1))
concat = concatenate([context, decoded_seq_in], axis=2)
attentional = attention_dense(concat)
attention_outputs = output_dense(attentional)
attention_model = Model([encoded_seq_in, decoded_seq_in], [attention_outputs, attention])
def decode_sequence(input_seq, bos_eos, max_output_length = 1000):
encoded_seq, *states_value = encoder_model.predict(input_seq)
# bos_eos[0]="<s>"に対応するインデックス
target_seq = np.array(bos_eos[0])
output_seq = bos_eos[0]
attention_seq = np.empty((0,len(input_seq[0])))
while True:
decoded_seq, *states_value = decoder_model.predict([target_seq] + states_value)
output_tokens, attention = attention_model.predict([encoded_seq, decoded_seq])
sampled_token_index = [np.argmax(output_tokens[0, -1, :])]
output_seq += sampled_token_index
attention_seq = np.append(attention_seq, attention[0], axis=0)
if (sampled_token_index == bos_eos[1] or len(output_seq) > max_output_length):
break
target_seq = np.array(sampled_token_index)
return output_seq
# ----- decode_sequence ----- #
return decode_sequence
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment