Instantly share code, notes, and snippets.

What would you like to do?
Convert gensim word2vec to tensorboard visualized model, detail:
# encoding: utf-8
@author: BrikerMan
@version: 1.0
@license: Apache Licence
@time: 2017/7/30 上午9:37
import sys, os
from gensim.models import Word2Vec
import tensorflow as tf
import numpy as np
from tensorflow.contrib.tensorboard.plugins import projector
def visualize(model, output_path):
meta_file = "w2x_metadata.tsv"
placeholder = np.zeros((len(model.wv.index2word), 100))
with open(os.path.join(output_path,meta_file), 'wb') as file_metadata:
for i, word in enumerate(model.wv.index2word):
placeholder[i] = model[word]
# temporary solution for
if word == '':
print("Emply Line, should replecaed by any thing else, or will cause a bug of tensorboard")
file_metadata.write("{0}".format('<Empty Line>').encode('utf-8') + b'\n')
file_metadata.write("{0}".format(word).encode('utf-8') + b'\n')
# define the model without training
sess = tf.InteractiveSession()
embedding = tf.Variable(placeholder, trainable = False, name = 'w2x_metadata')
saver = tf.train.Saver()
writer = tf.summary.FileWriter(output_path, sess.graph)
# adding into projector
config = projector.ProjectorConfig()
embed = config.embeddings.add()
embed.tensor_name = 'w2x_metadata'
embed.metadata_path = meta_file
# Specify the width and height of a single thumbnail.
projector.visualize_embeddings(writer, config), os.path.join(output_path,'w2x_metadata.ckpt'))
print('Run `tensorboard --logdir={0}` to run visualize result on tensorboard'.format(output_path))
if __name__ == "__main__":
Just run `python word2vec.model visualize_result`
model_path = sys.argv[1]
output_path = sys.argv[2]
print("Please provice model path and output path")
model = Word2Vec.load(model_path)
visualize(model, output_path)

This comment has been minimized.

saeedbibak commented Jan 9, 2018

Thanks a lot :)


This comment has been minimized.

MarcSzafraniec commented Jan 12, 2018

You're the best!


This comment has been minimized.

Behrad3d commented Feb 26, 2018

This work very well. Thanks for sharing it buddy :)


This comment has been minimized.

ltjds commented Sep 13, 2018

This is awesome. At first, when I ran it, I had problems with my TensorFlow build (i.e, I got Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA), so I used this resource to make sure my build was up to date.

Then, I also had an issue with the function not finding the .tsv file created (i.e., I got FileNotFoundError: [Errno 2] No such file or directory: 'visualize_result/w2x_metadata.tsv'). So, I had to update the code a bit with this snippet:

# [...]
def visualize(model, output_path):
    meta_file = "w2x_metadata.tsv"
    placeholder = np.zeros((len(model.wv.index2word), model.vector_size))    # 'model.vector_size' used to be '100'
    # I needed to change '100' to 'model.vector_size' to accommodate generalized sizes of word vectors.
    except FileExistsError:

    # Nothing changed below this point.
    with open(os.path.join(output_path,meta_file), 'wb') as file_metadata:
    # [...] 
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment