Function to turn word vector from http://bio.nlplab.org/#word-vectors
into text file format so that easily read.
def w2v_binary_to_text(file_path):
"""
Function to transform binary file of word vector file
to text file format
Ref: http://bio.nlplab.org/#word-vectors
"""
from gensim.models.keyedvectors import KeyedVectors
file_name = os.path.basename(file_path)
file_name_txt = file_name.split('.')[0] + '.txt'
file_dir = os.path.split(file_path)[0]
model = KeyedVectors.load_word2vec_format(file_path, binary=True)
model.save_word2vec_format(os.path.join(file_dir, file_name_txt), binary=False)
Load pre-trained embedding text file. This will give dictionary of embeddings where key is a word and value is word vector.
def load_pretrain_vector(file_name):
"""
Load pre-trained vector from given file name
"""
f = open(file_name, 'r')
lines = f.readlines()[1:]
embeddings_index = {}
for line in lines:
values = line.strip().split(' ')
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
return embeddings_index