Skip to content

Instantly share code, notes, and snippets.

@amankharwal
Created August 21, 2020 05:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amankharwal/d291c945b55427d743807df39a21b20a to your computer and use it in GitHub Desktop.
Save amankharwal/d291c945b55427d743807df39a21b20a to your computer and use it in GitHub Desktop.
texts_train, labels_train = preprocessData('train.txt', mode="train")
texts_dev, labels_dev = preprocessData('dev.txt', mode="train")
texts_test, labels_test = preprocessData('test.txt', mode="train")
def getEmbeddings(file):
embeddingsIndex = {}
dim = 0
with io.open(file, encoding="utf8") as f:
for line in f:
values = line.split()
word = values[0]
embeddingVector = np.asarray(values[1:], dtype='float32')
embeddingsIndex[word] = embeddingVector
dim = len(embeddingVector)
return embeddingsIndex, dim
def getEmbeddingMatrix(wordIndex, embeddings, dim):
embeddingMatrix = np.zeros((len(wordIndex) + 1, dim))
for word, i in wordIndex.items():
embeddingMatrix[i] = embeddings.get(word)
return embeddingMatrix
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment