Skip to content

Instantly share code, notes, and snippets.

@pillyshi
Created May 28, 2019 16:36
Show Gist options
  • Save pillyshi/d2f6ecee80b53281b08fea4f85fd1f84 to your computer and use it in GitHub Desktop.
Save pillyshi/d2f6ecee80b53281b08fea4f85fd1f84 to your computer and use it in GitHub Desktop.
import training
from gensim.models import Word2Vec
import numpy as np
from sklearn.linear_model import LogisticRegressionCV
np.random.seed(0)
model = Word2Vec.load("/path/to/your/w2v_model")
data = []
X = []
y = []
for w1, w2 in training.data:
if w1 in model and w2 in model:
data.append([w1,w2])
data.append([w2,w1])
X.append(model[w1]-model[w2])
X.append(model[w2]-model[w1])
y.append(1)
y.append(-1)
X = np.array(X)
y = np.array(y)
idx = np.random.permutation(len(y))
ntr = int(len(y)*0.7)
itr = idx[:ntr]
ite = idx[ntr:]
Xtr = X[itr]
ytr = y[itr]
Xte = X[ite]
yte = y[ite]
clf=LogisticRegressionCV().fit(Xtr, ytr)
ypred = clf.predict(Xte)
from sklearn import metrics
print(metrics.accuracy_score(yte, ypred)) # >>> 0.99502487562189057
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment