ngopal/loading_pretrained_embeddings.py

## loading_pretrained_embeddings.py
# The first step is to load the pre-trained vectors into python. The example below uses glove data.
import os
GLOVE_DIR = "/path/to/pretrained/embeddings/glove.6B/"
embeddings_index = {}
f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'), "r")
for line in f:
  values = line.split()
  word = values[0]
  coefs = np.asarray(values[1:], dtype='float32')
  embeddings_index[word] = coefs
f.close()

EMBEDDING_DIM = 100
embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
for word, i in word_index.items():
  embedding_vector = embeddings_index.get(word)
  if embedding_vector is not None:
    # words not found in embedding index will be all-zeros.
    embedding_matrix[i] = embedding_vector

# The second step is to load this data into a keras embedding layer
from keras.layers import Embedding
MAX_SEQUENCE_LENGTH = 1000
embedding_layer = Embedding(len(word_index) + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_SEQUENCE_LENGTH,
                            trainable=False)
	# The first step is to load the pre-trained vectors into python. The example below uses glove data.
	import os
	GLOVE_DIR = "/path/to/pretrained/embeddings/glove.6B/"
	embeddings_index = {}
	f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'), "r")
	for line in f:
	values = line.split()
	word = values[0]
	coefs = np.asarray(values[1:], dtype='float32')
	embeddings_index[word] = coefs
	f.close()

	EMBEDDING_DIM = 100
	embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
	for word, i in word_index.items():
	embedding_vector = embeddings_index.get(word)
	if embedding_vector is not None:
	# words not found in embedding index will be all-zeros.
	embedding_matrix[i] = embedding_vector

	# The second step is to load this data into a keras embedding layer
	from keras.layers import Embedding
	MAX_SEQUENCE_LENGTH = 1000
	embedding_layer = Embedding(len(word_index) + 1,
	EMBEDDING_DIM,
	weights=[embedding_matrix],
	input_length=MAX_SEQUENCE_LENGTH,
	trainable=False)