Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Read in word vectors and visualize using TSNE
import gensim
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
model = gensim.models.KeyedVectors.load_word2vec_format('MUSE/dumped/debug/8gywilp7r1/vectors-rw1.txt')
model2 = gensim.models.KeyedVectors.load_word2vec_format('MUSE/dumped/debug/8gywilp7r1/vectors-lg1.txt')
lg_dict = {
"katonda": "god",
"emu": "one",
"kabaka": "king",
"mutabani" : "son",
"mukazi":"son-in-law",
"bantu":"people",
"lukalu":"land",
"ttaka":"land",
"ettaka":"land",
"nsi":"land",
"olukalu":"land",
"nnyumba":"house",
"ennyumba":"house",
"omulenzi":"son",
"taata":"father",
"ekintu":"thing",
"engoye":"thing",
"kintu":"thing",
"mukono":"hand",
"omukono":"hand",
"kifo":"place",
"ekifo":"place",
"ekibuga":"city",
"kibuga":"city",
"ensi":"earth",
"erinnya":"name",
"lunaku":"day",
"misana":"day",
"emisana":"day",
"olunaku":"day",
"mutima":"heart",
"ekigambo":"word",
"kigambo":"word",
"byombi":"two",
"mannyo":"two",
"ekikoola":"two",
"erinnyo":"two",
"liiso":"eye",
"eriiso":"eye",
"nnyindo":"nose",
"ennyindo":"nose",
"omukwano":"love",
"mukwano":"love",
"mwanyina":"brother",
"kwagalana":"to-love-each-other",
"omuntu":"man",
"omusajja":"man",
"musajja":"man",
}
rw_dict = {
"imana": "god",
"rimwe": "one",
"umwami": "king",
"mwene" : "son",
"umukwe":"son-in-law",
"rubanda":"people",
"isambu":"land",
"igihugu":"land",
"ubutaka":"land",
"inzu":"house",
"akazu":"house",
"baba":"father",
"ikintu":"thing",
"akantu":"small-thing",
"ikiganza":"hand",
"umukono":"hand",
"ahantu":"place",
"igitaka":"earth",
"isi":"earth",
"izina":"name",
"umunsi":"day",
"umutima":"heart",
"ijambo":"word",
"ebyiri":"two",
"kabiri":"two",
"ijisho":"eye",
"izuru":"nose",
"ishyanga":"foreign-country",
"urukundo":"love",
"musaza":"brother-of-a-female",
"umugambi":"plan",
"umugambanyi":"traitor",
"umusaza":"old-man",
"umusore":"male-teenager",
"data":"paternal-uncle",
"sebukwe":"father-in-law-of-someone-else",
"sobukwe":"father-in-law-of-person-being-spoken-to",
"kurandata":"to-lead-by-the-hand",
"gukunda":"to-like-love",
"gukundana":"to-like-or-love-each-other",
"gukundwa":"to-be-loved"
}
def tsne_plot(model1, model2):
"Creates and TSNE model and plots it"
labels1 = []
tokens1 = []
labels2 = []
tokens2 = []
for word in model1.wv.vocab:
tokens1.append(model1[word])
labels1.append(word)
for word in model2.wv.vocab:
tokens2.append(model2[word])
labels2.append(word)
tsne_model1 = TSNE(perplexity=50, n_components=2, init='pca', n_iter=2500, random_state=23)
new_values1 = tsne_model1.fit_transform(tokens1)
tsne_model2 = TSNE(perplexity=50, n_components=2, init='pca', n_iter=2500, random_state=23)
new_values2 = tsne_model2.fit_transform(tokens2)
x1 = []
y1 = []
for value in new_values1:
x1.append(value[0])
y1.append(value[1])
x2 = []
y2 = []
for value in new_values2:
x2.append(value[0])
y2.append(value[1])
plt.figure(figsize=(16, 16))
for i in range(len(x1)):
translation = ''
if (labels1[i]) in (rw_dict.keys()):
translation = rw_dict[labels1[i]]
else:
translation = "null"
plt.scatter(x1[i],y1[i],c='b')
plt.annotate(labels1[i] + " - " + translation,
xy=(x1[i], y1[i]),
xytext=(5, 2),
textcoords='offset points',
ha='right',
va='bottom')
for i in range(len(x2)):
translation = ''
if (labels2[i]) in (lg_dict.keys()):
translation = lg_dict[labels2[i]]
else:
translation = "null"
plt.scatter(x2[i],y2[i],c='g')
plt.annotate(labels2[i] + " - " + translation,
xy=(x2[i], y2[i]),
xytext=(5, 2),
textcoords='offset points',
ha='right',
va='bottom')
plt.savefig('shared-filtered.png')
plt.show()
tsne_plot(model, model2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.