Skip to content

Instantly share code, notes, and snippets.

View Jirayut558's full-sized avatar

Jirayut Keawchuen Jirayut558

View GitHub Profile
def test_model():
# define model
train, infenc, infdec = ende_embedding_model(n_in, n_out, 256)
# load weights
infenc.load_weights("model/model_enc.h5")
infdec.load_weights("model/model_dec.h5")
# start prediction
while True:
def train():
data = "corpus.th.csv"
dataX = []
dataY = []
X1 = []
X2 = []
Y = []
vecsize = encoded_length
dataX,dataY = load_csv_data(data)
def embedding_model():
# define word embedding
vocab_list = [(k, wv_model.wv[k]) for k, v in wv_model.wv.vocab.items()]
embeddings_matrix = np.zeros((len(wv_model.wv.vocab.items()) + 1, wv_model.vector_size))
for i in range(len(vocab_list)):
word = vocab_list[i][0]
embeddings_matrix[i + 1] = vocab_list[i][1]
embedding_layer = Embedding(input_dim=len(embeddings_matrix),
output_dim=EMBEDDING_DIM,
def word_index(listword):
dataset = []
for sentence in listword:
tmp = []
for w in sentence:
tmp.append(word2idx(w))
dataset.append(tmp)
return np.array(dataset)
def word2idx(word):
X1 = word_index(X1)
X2 = word_index(X2)
Y = word_index(Y)
Y = to_categorical(Y, num_classes=max_word+1)
def padding_sequence(listsentence,maxseq):
dataset = []
for s in listsentence:
n = maxseq - len(s)
if n>0:
dataset.append(s+(["<EOS>"]*n))
elif n<0:
dataset.append(s[0:maxseq])
else:
dataset.append(s)
def preparingword(listword):
word =[]
for w in listword:
word.append(wordcut(w))
return word
def wordcut(sentence):
return deepcut.tokenize(sentence)
def load_data(datafile):
dataX = []
dataY = []
data = open(datafile, "r").read().lower()
for i in data.split("\n\n"):
a = i.split("\n")
question = a[0]
answer = a[1]
dataX.append(question)
dataY.append(answer)
def load_data(datafile):
dataX = []
dataY = []
data = open(datafile, "r").read().lower()
for i in data.split("\n\n"):
a = i.split("\n")
question = a[0]
answer = a[1]
dataX.append(question)
dataY.append(answer)
# -*- coding: utf-8 -*-
import logging
import os.path
import sys
import multiprocessing
from gensim.corpora import WikiCorpus
from gensim.models import Word2Vec
from gensim.models.word2vec import LineSentence
if __name__ == '__main__':