Skip to content

Instantly share code, notes, and snippets.

@eumesy
Last active December 7, 2019 03:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eumesy/1912f9967ffefb4911444d1419cde2ac to your computer and use it in GitHub Desktop.
Save eumesy/1912f9967ffefb4911444d1419cde2ac to your computer and use it in GitHub Desktop.
def loadGloveModel(gloveFile):
# via https://stackoverflow.com/a/38230349
# output: dict (word -> embedding)
print("Loading Glove Model", file=sys.stderr)
model = {}
for i, line in enumerate(gloveFile, start=1):
if i % 100000 == 0:
print("load {} data".format(i), file=sys.stderr)
splitLine = line.split(' ')
word = splitLine[0]
embedding = np.array([float(val) for val in splitLine[1:]])
model[word] = embedding
print("Done.",len(model)," words loaded!", file=sys.stderr)
return model
def loadGloveVectors(gloveFile):
# input: file
# output: numpy ndarray (n, dim)
return np.array([np.array(line.split(' ')[1:]).astype(float)
for line in tqdm(gloveFile)])
@eumesy
Copy link
Author

eumesy commented Dec 7, 2019

load words

def loadGloveVectors(gloveFile, skip_first_line=False):
    # input: file
    # output: numpy ndarray (n, dim)
    if skip_first_line:
        next(gloveFile)

    return np.array([np.array(line.split(' ')[1:]).astype(float)
                     for line in tqdm(gloveFile)])

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment