Last active
August 13, 2019 20:23
-
-
Save dermatologist/d1305ab6d4387f315437fe853a0ce223 to your computer and use it in GitHub Desktop.
Convert pre-trained cui2vec to Glove and word2vec
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from gensim.models import KeyedVectors | |
from gensim.scripts.glove2word2vec import glove2word2vec | |
if os.path.exists('cui2vec_w.txt'): | |
wv_from_text = KeyedVectors.load_word2vec_format('cui2vec_w.txt', unicode_errors='ignore') | |
print(wv_from_text.most_similar("C0000052", topn=2)) | |
wv_from_text.save_word2vec_format('cui2vec_w.bin', binary=True) | |
else: | |
with open("cui2vec_pretrained.csv",'r') as f: | |
with open("cui2vec_g.txt",'w') as f1: | |
next(f) # skip header line | |
count = 0 | |
for line in f: | |
line = '"'.join(line.split()).replace('"', '') | |
line = ",".join(line.split()).replace(',', ' ') | |
line = line + ' \n' | |
f1.write(line) | |
count += 1 | |
glove2word2vec('cui2vec_g.txt', 'cui2vec_w.txt') | |
print(count) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment