Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Convert gensim word2vec to tensorboard visualized model, detail: https://eliyar.biz/using-pre-trained-gensim-word2vector-in-a-keras-model-and-visualizing/
from gensim.models import KeyedVectors
# Load gensim word2vec
w2v_path = '<Gensim File Path>'
w2v = KeyedVectors.load_word2vec_format(w2v_path)
import io
# Vector file, `\t` seperated the vectors and `\n` seperate the words
"""
0.1\t0.2\t0.5\t0.9
0.2\t0.1\t5.0\t0.2
0.4\t0.1\t7.0\t0.8
"""
out_v = io.open('vecs.tsv', 'w', encoding='utf-8')
# Meta data file, `\n` seperated word
"""
token1
token2
token3
"""
out_m = io.open('meta.tsv', 'w', encoding='utf-8')
# Write meta file and vector file
for index in range(len(w2v.index2word)):
word = w2v.index2word[index]
vec = w2v.vectors[index]
out_m.write(word + "\n")
out_v.write('\t'.join([str(x) for x in vec]) + "\n")
out_v.close()
out_m.close()
# Then we can visuale using the `http://projector.tensorflow.org/` to visualize those two files.
# 1. Open the Embedding Projector.
# 2. Click on "Load data".
# 3. Upload the two files we created above: vecs.tsv and meta.tsv.
# encoding: utf-8
"""
@author: BrikerMan
@contact: eliyar917@gmail.com
@blog: https://eliyar.biz
@version: 1.0
@license: Apache Licence
@file: w2v_visualizer.py
@time: 2017/7/30 上午9:37
"""
import sys
import os
import pathlib
import numpy as np
from gensim.models.keyedvectors import KeyedVectors
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
def visualize(model, output_path):
meta_file = "w2x_metadata.tsv"
placeholder = np.zeros((len(model.wv.index2word), model.vector_size))
with open(os.path.join(output_path, meta_file), 'wb') as file_metadata:
for i, word in enumerate(model.wv.index2word):
placeholder[i] = model[word]
# temporary solution for https://github.com/tensorflow/tensorflow/issues/9094
if word == '':
print("Emply Line, should replecaed by any thing else, or will cause a bug of tensorboard")
file_metadata.write("{0}".format('<Empty Line>').encode('utf-8') + b'\n')
else:
file_metadata.write("{0}".format(word).encode('utf-8') + b'\n')
# define the model without training
sess = tf.InteractiveSession()
embedding = tf.Variable(placeholder, trainable=False, name='w2x_metadata')
tf.global_variables_initializer().run()
saver = tf.train.Saver()
writer = tf.summary.FileWriter(output_path, sess.graph)
# adding into projector
config = projector.ProjectorConfig()
embed = config.embeddings.add()
embed.tensor_name = 'w2x_metadata'
embed.metadata_path = meta_file
# Specify the width and height of a single thumbnail.
projector.visualize_embeddings(writer, config)
saver.save(sess, os.path.join(output_path, 'w2x_metadata.ckpt'))
print('Run `tensorboard --logdir={0}` to run visualize result on tensorboard'.format(output_path))
if __name__ == "__main__":
"""
Use model.save_word2vec_format to save w2v_model as word2evc format
Then just run `python w2v_visualizer.py word2vec.text visualize_result`
"""
try:
model_path = sys.argv[1]
output_path = sys.argv[2]
except:
print("Please provice model path and output path")
model = KeyedVectors.load_word2vec_format(model_path)
pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
visualize(model, output_path)
@deletemyaccount

This comment has been minimized.

Copy link

deletemyaccount commented Jan 9, 2018

Thanks a lot :)

@MarcSzafraniec

This comment has been minimized.

Copy link

MarcSzafraniec commented Jan 12, 2018

You're the best!

@Behrad3d

This comment has been minimized.

Copy link

Behrad3d commented Feb 26, 2018

This work very well. Thanks for sharing it buddy :)

@ltjds

This comment has been minimized.

Copy link

ltjds commented Sep 13, 2018

This is awesome. At first, when I ran it, I had problems with my TensorFlow build (i.e, I got Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA), so I used this resource to make sure my build was up to date.

Then, I also had an issue with the function not finding the .tsv file created (i.e., I got FileNotFoundError: [Errno 2] No such file or directory: 'visualize_result/w2x_metadata.tsv'). So, I had to update the code a bit with this snippet:

# [...]
def visualize(model, output_path):
    meta_file = "w2x_metadata.tsv"
    placeholder = np.zeros((len(model.wv.index2word), model.vector_size))    # 'model.vector_size' used to be '100'
    # I needed to change '100' to 'model.vector_size' to accommodate generalized sizes of word vectors.
    try:
        os.mkdir(output_path)
    except FileExistsError:
        pass

    # Nothing changed below this point.
    with open(os.path.join(output_path,meta_file), 'wb') as file_metadata:
    # [...] 
@BrikerMan

This comment has been minimized.

Copy link
Owner Author

BrikerMan commented Mar 12, 2019

This is awesome. At first, when I ran it, I had problems with my TensorFlow build (i.e, I got Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA), so I used this resource to make sure my build was up to date.

Then, I also had an issue with the function not finding the .tsv file created (i.e., I got FileNotFoundError: [Errno 2] No such file or directory: 'visualize_result/w2x_metadata.tsv'). So, I had to update the code a bit with this snippet:

# [...]
def visualize(model, output_path):
    meta_file = "w2x_metadata.tsv"
    placeholder = np.zeros((len(model.wv.index2word), model.vector_size))    # 'model.vector_size' used to be '100'
    # I needed to change '100' to 'model.vector_size' to accommodate generalized sizes of word vectors.
    try:
        os.mkdir(output_path)
    except FileExistsError:
        pass

    # Nothing changed below this point.
    with open(os.path.join(output_path,meta_file), 'wb') as file_metadata:
    # [...] 

Thanks~ I have updated my code~

@wjcsz

This comment has been minimized.

Copy link

wjcsz commented Jul 15, 2019

Works with fastText after changing model = KeyedVectors.load_word2vec_format(model_path) to model = FT.load(model_path). Great job.

@sherringzhang

This comment has been minimized.

Copy link

sherringzhang commented May 4, 2020

may i know which tensorflow version do you use ?

@BrikerMan

This comment has been minimized.

Copy link
Owner Author

BrikerMan commented May 4, 2020

may i know which tensorflow version do you use ?

1.14.0, 1.15.0

@BrikerMan

This comment has been minimized.

Copy link
Owner Author

BrikerMan commented May 16, 2020

@sherringzhang try the v2 version, no need to use tensorflow.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.