This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def test_model(): | |
# define model | |
train, infenc, infdec = ende_embedding_model(n_in, n_out, 256) | |
# load weights | |
infenc.load_weights("model/model_enc.h5") | |
infdec.load_weights("model/model_dec.h5") | |
# start prediction | |
while True: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def train(): | |
data = "corpus.th.csv" | |
dataX = [] | |
dataY = [] | |
X1 = [] | |
X2 = [] | |
Y = [] | |
vecsize = encoded_length | |
dataX,dataY = load_csv_data(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def embedding_model(): | |
# define word embedding | |
vocab_list = [(k, wv_model.wv[k]) for k, v in wv_model.wv.vocab.items()] | |
embeddings_matrix = np.zeros((len(wv_model.wv.vocab.items()) + 1, wv_model.vector_size)) | |
for i in range(len(vocab_list)): | |
word = vocab_list[i][0] | |
embeddings_matrix[i + 1] = vocab_list[i][1] | |
embedding_layer = Embedding(input_dim=len(embeddings_matrix), | |
output_dim=EMBEDDING_DIM, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def word_index(listword): | |
dataset = [] | |
for sentence in listword: | |
tmp = [] | |
for w in sentence: | |
tmp.append(word2idx(w)) | |
dataset.append(tmp) | |
return np.array(dataset) | |
def word2idx(word): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
X1 = word_index(X1) | |
X2 = word_index(X2) | |
Y = word_index(Y) | |
Y = to_categorical(Y, num_classes=max_word+1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def padding_sequence(listsentence,maxseq): | |
dataset = [] | |
for s in listsentence: | |
n = maxseq - len(s) | |
if n>0: | |
dataset.append(s+(["<EOS>"]*n)) | |
elif n<0: | |
dataset.append(s[0:maxseq]) | |
else: | |
dataset.append(s) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def preparingword(listword): | |
word =[] | |
for w in listword: | |
word.append(wordcut(w)) | |
return word | |
def wordcut(sentence): | |
return deepcut.tokenize(sentence) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def load_data(datafile): | |
dataX = [] | |
dataY = [] | |
data = open(datafile, "r").read().lower() | |
for i in data.split("\n\n"): | |
a = i.split("\n") | |
question = a[0] | |
answer = a[1] | |
dataX.append(question) | |
dataY.append(answer) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def load_data(datafile): | |
dataX = [] | |
dataY = [] | |
data = open(datafile, "r").read().lower() | |
for i in data.split("\n\n"): | |
a = i.split("\n") | |
question = a[0] | |
answer = a[1] | |
dataX.append(question) | |
dataY.append(answer) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import logging | |
import os.path | |
import sys | |
import multiprocessing | |
from gensim.corpora import WikiCorpus | |
from gensim.models import Word2Vec | |
from gensim.models.word2vec import LineSentence | |
if __name__ == '__main__': |
NewerOlder