sismetanin/emocontext-embeddings.py

## emocontext-embeddings.py
def getEmbeddings(file):
    embeddingsIndex = {}
    dim = 0
    with io.open(file, encoding="utf8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            embeddingVector = np.asarray(values[1:], dtype='float32')
            embeddingsIndex[word] = embeddingVector
            dim = len(embeddingVector)
    return embeddingsIndex, dim


def getEmbeddingMatrix(wordIndex, embeddings, dim):
    embeddingMatrix = np.zeros((len(wordIndex) + 1, dim))
    for word, i in wordIndex.items():
        embeddingMatrix[i] = embeddings.get(word)
    return embeddingMatrix


from keras.preprocessing.text import Tokenizer

embeddings, dim = getEmbeddings('emosense.300d.txt')
tokenizer = Tokenizer(filters='')
tokenizer.fit_on_texts([' '.join(list(embeddings.keys()))])

wordIndex = tokenizer.word_index
print("Found %s unique tokens." % len(wordIndex))

embeddings_matrix = getEmbeddingMatrix(wordIndex, embeddings, dim)
	def getEmbeddings(file):
	embeddingsIndex = {}
	dim = 0
	with io.open(file, encoding="utf8") as f:
	for line in f:
	values = line.split()
	word = values[0]
	embeddingVector = np.asarray(values[1:], dtype='float32')
	embeddingsIndex[word] = embeddingVector
	dim = len(embeddingVector)
	return embeddingsIndex, dim


	def getEmbeddingMatrix(wordIndex, embeddings, dim):
	embeddingMatrix = np.zeros((len(wordIndex) + 1, dim))
	for word, i in wordIndex.items():
	embeddingMatrix[i] = embeddings.get(word)
	return embeddingMatrix


	from keras.preprocessing.text import Tokenizer

	embeddings, dim = getEmbeddings('emosense.300d.txt')
	tokenizer = Tokenizer(filters='')
	tokenizer.fit_on_texts([' '.join(list(embeddings.keys()))])

	wordIndex = tokenizer.word_index
	print("Found %s unique tokens." % len(wordIndex))

	embeddings_matrix = getEmbeddingMatrix(wordIndex, embeddings, dim)