Instantly share code, notes, and snippets.

Embed
What would you like to do?
Convert gensim word2vec to tensorboard visualized model, detail: https://eliyar.biz/using-pre-trained-gensim-word2vector-in-a-keras-model-and-visualizing/
# encoding: utf-8
"""
@author: BrikerMan
@contact: eliyar917@gmail.com
@blog: https://eliyar.biz
@version: 1.0
@license: Apache Licence
@file: w2v_visualizer.py
@time: 2017/7/30 上午9:37
"""
import sys, os
from gensim.models import Word2Vec
import tensorflow as tf
import numpy as np
from tensorflow.contrib.tensorboard.plugins import projector
def visualize(model, output_path):
meta_file = "w2x_metadata.tsv"
placeholder = np.zeros((len(model.wv.index2word), 100))
with open(os.path.join(output_path,meta_file), 'wb') as file_metadata:
for i, word in enumerate(model.wv.index2word):
placeholder[i] = model[word]
# temporary solution for https://github.com/tensorflow/tensorflow/issues/9094
if word == '':
print("Emply Line, should replecaed by any thing else, or will cause a bug of tensorboard")
file_metadata.write("{0}".format('<Empty Line>').encode('utf-8') + b'\n')
else:
file_metadata.write("{0}".format(word).encode('utf-8') + b'\n')
# define the model without training
sess = tf.InteractiveSession()
embedding = tf.Variable(placeholder, trainable = False, name = 'w2x_metadata')
tf.global_variables_initializer().run()
saver = tf.train.Saver()
writer = tf.summary.FileWriter(output_path, sess.graph)
# adding into projector
config = projector.ProjectorConfig()
embed = config.embeddings.add()
embed.tensor_name = 'w2x_metadata'
embed.metadata_path = meta_file
# Specify the width and height of a single thumbnail.
projector.visualize_embeddings(writer, config)
saver.save(sess, os.path.join(output_path,'w2x_metadata.ckpt'))
print('Run `tensorboard --logdir={0}` to run visualize result on tensorboard'.format(output_path))
if __name__ == "__main__":
"""
Just run `python w2v_visualizer.py word2vec.model visualize_result`
"""
try:
model_path = sys.argv[1]
output_path = sys.argv[2]
except:
print("Please provice model path and output path")
model = Word2Vec.load(model_path)
visualize(model, output_path)
@saeedbibak

This comment has been minimized.

saeedbibak commented Jan 9, 2018

Thanks a lot :)

@MarcSzafraniec

This comment has been minimized.

MarcSzafraniec commented Jan 12, 2018

You're the best!

@Behrad3d

This comment has been minimized.

Behrad3d commented Feb 26, 2018

This work very well. Thanks for sharing it buddy :)

@ltjds

This comment has been minimized.

ltjds commented Sep 13, 2018

This is awesome. At first, when I ran it, I had problems with my TensorFlow build (i.e, I got Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA), so I used this resource to make sure my build was up to date.

Then, I also had an issue with the function not finding the .tsv file created (i.e., I got FileNotFoundError: [Errno 2] No such file or directory: 'visualize_result/w2x_metadata.tsv'). So, I had to update the code a bit with this snippet:

# [...]
def visualize(model, output_path):
    meta_file = "w2x_metadata.tsv"
    placeholder = np.zeros((len(model.wv.index2word), model.vector_size))    # 'model.vector_size' used to be '100'
    # I needed to change '100' to 'model.vector_size' to accommodate generalized sizes of word vectors.
    try:
        os.mkdir(output_path)
    except FileExistsError:
        pass

    # Nothing changed below this point.
    with open(os.path.join(output_path,meta_file), 'wb') as file_metadata:
    # [...] 
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment