Skip to content

Instantly share code, notes, and snippets.

@npow
Created July 10, 2015 20:26
Show Gist options
  • Save npow/2e05ea9835615d94f9a0 to your computer and use it in GitHub Desktop.
Save npow/2e05ea9835615d94f9a0 to your computer and use it in GitHub Desktop.
def load_bin_vec(fname, vocab):
"""
Loads 300x1 word vecs from Google (Mikolov) word2vec
"""
word_vecs = {}
with open(fname, "rb") as f:
header = f.readline()
vocab_size, layer1_size = map(int, header.split())
binary_len = np.dtype('float32').itemsize * layer1_size
for line in xrange(vocab_size):
word = []
while True:
ch = f.read(1)
if ch == ' ':
word = ''.join(word).lower()
break
if ch != '\n':
word.append(ch)
if word in vocab:
word_vecs[word] = np.fromstring(f.read(binary_len), dtype='float32')
else:
f.read(binary_len)
return word_vecs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment