Skip to content

Instantly share code, notes, and snippets.

@chssch
Created April 24, 2017 21:19
Show Gist options
  • Save chssch/3346e94adf3e1f2d439c278fc0642878 to your computer and use it in GitHub Desktop.
Save chssch/3346e94adf3e1f2d439c278fc0642878 to your computer and use it in GitHub Desktop.
def load_data(filename):
traina = cPickle.load( open( filename, "rb" ) )
j = 0
i = 0
temp = None
pairs = []
for r in traina:
d = spacy.tokens.doc.Doc(nlp.vocab)
d.from_bytes(r)
e = spacy.tokens.doc.Doc(nlp.vocab, words=[unicode(w) for w in d if not w.is_stop])
if i % 2 == 1:
pairs.append((e, temp))
j += 1
else:
temp = e
if i % 100 == 0:
print(i)
i += 1
return pairs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment