Skip to content

Instantly share code, notes, and snippets.

@brandonrobertz
Last active October 9, 2018 09:29
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save brandonrobertz/49424db4164edb0d8ab34f16a3b742d5 to your computer and use it in GitHub Desktop.
Save brandonrobertz/49424db4164edb0d8ab34f16a3b742d5 to your computer and use it in GitHub Desktop.
Keras Skipgram Embedding (using pretrained FastText vectors)
# coding: utf-8
from __future__ import print_function
import numpy as np
from keras.models import Sequential
from keras.layers import Embedding
window_size = 1
# using skipgram embeddings built using fasttext:
# fasttext skipgram -input dataset -output dataset.skipgram
with open('data/dataset.skipgram.vec', 'r') as f:
data = f.readlines()
word_vectors = {}
samples, dim = data[0].split()
for line in data[1:]:
word, vec = line.split(' ', 1)
word_vectors[word] = np.array([
float(i) for i in vec.split()
], dtype='float32')
E = np.zeros(shape=(int(samples), int(dim)), dtype='float32')
word_index = word_vectors.keys()
for ix in range(len(word_index)):
word = word_index[ix]
vec = word_vectors[word]
for j in range(int(dim)):
E[ix][j] = vec[j]
embedding = Embedding(
len(word_index),
int(dim),
weights=[E],
input_length=window_size,
trainable=False
)
model = Sequential()
model.add(embedding)
model.compile('sgd', 'mse', ['accuracy'])
pred = model.predict(np.array([[0]]))
p = pred[0][0]
a = word_vectors[word_index[0]]
print( "Predicted embedding vector", p)
print( "Actual embedding vector", a)
print( "Equal?", p == a)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment