Skip to content

Instantly share code, notes, and snippets.

@sismetanin
Created August 1, 2019 12:32
Show Gist options
  • Save sismetanin/836057bde8090f2e00e6c07066ef65d8 to your computer and use it in GitHub Desktop.
Save sismetanin/836057bde8090f2e00e6c07066ef65d8 to your computer and use it in GitHub Desktop.
def getEmbeddings(file):
embeddingsIndex = {}
dim = 0
with io.open(file, encoding="utf8") as f:
for line in f:
values = line.split()
word = values[0]
embeddingVector = np.asarray(values[1:], dtype='float32')
embeddingsIndex[word] = embeddingVector
dim = len(embeddingVector)
return embeddingsIndex, dim
def getEmbeddingMatrix(wordIndex, embeddings, dim):
embeddingMatrix = np.zeros((len(wordIndex) + 1, dim))
for word, i in wordIndex.items():
embeddingMatrix[i] = embeddings.get(word)
return embeddingMatrix
from keras.preprocessing.text import Tokenizer
embeddings, dim = getEmbeddings('emosense.300d.txt')
tokenizer = Tokenizer(filters='')
tokenizer.fit_on_texts([' '.join(list(embeddings.keys()))])
wordIndex = tokenizer.word_index
print("Found %s unique tokens." % len(wordIndex))
embeddings_matrix = getEmbeddingMatrix(wordIndex, embeddings, dim)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment