eumesy/loadGloVe.py

## loadGloVe.py
def loadGloveModel(gloveFile):
    # via https://stackoverflow.com/a/38230349

    # output: dict (word -> embedding)

    print("Loading Glove Model", file=sys.stderr)
    model = {}
    for i, line in enumerate(gloveFile, start=1):
        if i % 100000 == 0:
            print("load {} data".format(i), file=sys.stderr)
        splitLine = line.split(' ')
        word = splitLine[0]
        embedding = np.array([float(val) for val in splitLine[1:]])
        model[word] = embedding
    print("Done.",len(model)," words loaded!", file=sys.stderr)
    return model

def loadGloveVectors(gloveFile):
    # input: file
    # output: numpy ndarray (n, dim)
    return np.array([np.array(line.split(' ')[1:]).astype(float)
                     for line in tqdm(gloveFile)])
	def loadGloveModel(gloveFile):
	# via https://stackoverflow.com/a/38230349

	# output: dict (word -> embedding)

	print("Loading Glove Model", file=sys.stderr)
	model = {}
	for i, line in enumerate(gloveFile, start=1):
	if i % 100000 == 0:
	print("load {} data".format(i), file=sys.stderr)
	splitLine = line.split(' ')
	word = splitLine[0]
	embedding = np.array([float(val) for val in splitLine[1:]])
	model[word] = embedding
	print("Done.",len(model)," words loaded!", file=sys.stderr)
	return model

	def loadGloveVectors(gloveFile):
	# input: file
	# output: numpy ndarray (n, dim)
	return np.array([np.array(line.split(' ')[1:]).astype(float)
	for line in tqdm(gloveFile)])