Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Convert pre-trained cui2vec to Glove and word2vec
import os
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec
if os.path.exists('cui2vec_w.txt'):
wv_from_text = KeyedVectors.load_word2vec_format('cui2vec_w.txt', unicode_errors='ignore')
print(wv_from_text.most_similar("C0000052", topn=2))
wv_from_text.save_word2vec_format('cui2vec_w.bin', binary=True)
else:
with open("cui2vec_pretrained.csv",'r') as f:
with open("cui2vec_g.txt",'w') as f1:
next(f) # skip header line
count = 0
for line in f:
line = '"'.join(line.split()).replace('"', '')
line = ",".join(line.split()).replace(',', ' ')
line = line + ' \n'
f1.write(line)
count += 1
glove2word2vec('cui2vec_g.txt', 'cui2vec_w.txt')
print(count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.