Skip to content

Instantly share code, notes, and snippets.

@odashi
Created March 23, 2016 13:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save odashi/6c78e65f4b78adfa3766 to your computer and use it in GitHub Desktop.
Save odashi/6c78e65f4b78adfa3766 to your computer and use it in GitHub Desktop.
Frequently-used batch generators for my NLP study.
import builtins
import random
def word_list(filename):
with open(filename) as fp:
for l in fp:
yield l.split()
def batch(generator, size):
batch = []
for l in generator:
batch.append(l)
if len(batch) == size:
yield batch
batch = []
if batch:
yield batch
def shuffled(generator, size):
for b in batch(generator, size):
random.shuffle(b)
for l in b:
yield l
def sorted(generator, size, key):
for b in batch(generator, size):
for l in builtins.sorted(b, key=key):
yield l
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment