Skip to content

Instantly share code, notes, and snippets.

@Jirayut558
Created December 28, 2018 06:46
Show Gist options
  • Save Jirayut558/b8b6e29262270a3083a68f5bff297c6b to your computer and use it in GitHub Desktop.
Save Jirayut558/b8b6e29262270a3083a68f5bff297c6b to your computer and use it in GitHub Desktop.
def word_index(listword):
dataset = []
for sentence in listword:
tmp = []
for w in sentence:
tmp.append(word2idx(w))
dataset.append(tmp)
return np.array(dataset)
def word2idx(word):
index = 0
try:
index = wv_model.wv.vocab[word].index
except:
try:
sim = similar_word(word)
index = wv_model.wv.vocab[sim].index
except:
index = wv_model.wv.vocab["<NONE>"].index
return index
def similar_word(word):
sim_word = difflib.get_close_matches(word, word_list)
try:
return sim_word[0]
except:
return "<NONE>"
X1 = word_index(X1)
X2 = word_index(X2)
Y = word_index(Y)
Y = to_categorical(Y, num_classes=max_word+1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment