Skip to content

Instantly share code, notes, and snippets.

@mx-moth
Last active August 12, 2021 21:56
Show Gist options
  • Save mx-moth/4e283fea7143e64852dcc09b14a183f7 to your computer and use it in GitHub Desktop.
Save mx-moth/4e283fea7143e64852dcc09b14a183f7 to your computer and use it in GitHub Desktop.
Split a generator up in to chunks of size `n`, without walking the list or keeping items in memory needlessly
from itertools import chain, islice
class splitter:
"""Helper class for splitat."""
def __init__(self, iterable, count):
self.iterator = iter(iterable)
self.count = count
self.queue = []
def split(self):
return self.prefix(), self.suffix()
def prefix(self):
while self.count or self.queue:
# If there are items in the queue, then the suffix has been
# accessed before the prefix was consumed. Yield the remaining
# items from the queue and then clean up.
if self.queue:
queue = self.queue
# Consume the queue, poping items off the stack so they can be
# garbage collected as they are consumed.
while queue:
yield queue.pop(0)
del queue
del self.queue
# Finish up, the prefix has been exhausted.
return
# If the queue is empty, consume from the iterator as normal
try:
self.count -= 1
yield next(self.iterator)
except StopIteration:
return
def suffix(self):
# If the whole prefix has not been consumed yet, queue up the remaining
# items for the prefix to yield later.
if self.count:
self.queue = list(islice(self.iterator, self.count))
# Yield the remaining items from the iterator. Can't just return the
# iterator here, as that would eagerly consume the prefix.
yield from self.iterator
def splitat(iterable, count):
"""
Splits an iterable into a prefix and a suffix. The prefix contains the
first count items of the iterable, while suffix contains the rest, if any.
If the iterable has less than count element, the prefix will be shorter
than count elements.
"""
return splitter(iterable, count).split()
def gen_one(item):
"""Makes a generator that yields the single argument to this function."""
yield item
def isempty(iterable):
"""
Checks if an iterator is empty by peeking at its next element. Returns
a tuple of (empty, iterator). The returned iterator should be used instead
of the iterator that was passed in, as that iterator is now missing its
first item.
"""
try:
next_object = next(iterable)
except StopIteration:
return True, iterable
else:
head = gen_one(next_object)
del next_object
return False, chain(head, iterable)
def chunksof(iterable, n):
"""
Splits an iterable into chunks of length n. If there is not round number of
n chunks in iterable, the last chunk will be shorter than n.
"""
iterator = iter(iterable)
while True:
# Quit if the iterator is empty
empty, iterator = isempty(iterator)
if empty:
return
# Split the iterator and yield a chunk
chunk, iterator = splitat(iterator, n)
yield chunk
from chunks import chunksof, splitat
class VerboseInt:
def __init__(self, i):
print("Just made", i)
self.i = i
def __str__(self):
return str(self.i)
def __repr__(self):
return repr(self.i)
def __eq__(self, other):
return self.i == other
def __del__(self):
print("Deleting", self.i)
def verbose_range(*args):
for i in range(*args):
yield VerboseInt(i)
def test():
print("Making the chunks")
chunks = chunksof(verbose_range(11), 3)
print("Getting the first chunk")
first_chunk = next(chunks)
print("Testing first chunk item by item")
assert next(first_chunk) == 0
assert next(first_chunk) == 1
assert next(first_chunk) == 2
try:
out = next(first_chunk)
except StopIteration:
pass
else:
assert False, f"Should have raised StopIteration, got {out} instead"
print("Getting second chunk")
second_chunk = next(chunks)
print("Getting third chunk")
third_chunk = next(chunks)
print("Testing second and third chunk, item by item")
assert next(third_chunk) == 6
assert next(second_chunk) == 3
assert next(third_chunk) == 7
assert next(second_chunk) == 4
assert next(second_chunk) == 5
assert next(third_chunk) == 8
print("Done! What is left?")
print(list(chunks))
chunks.close()
del chunks
prefix, suffix = splitat(verbose_range(10), 5)
print("Testing prefix")
assert next(prefix) == 0
assert next(prefix) == 1
print("Testing suffix")
assert next(suffix) == 5
assert next(suffix) == 6
print("Testing prefix again")
assert next(prefix) == 2
assert next(prefix) == 3
assert next(prefix) == 4
try:
out = next(prefix)
except StopIteration:
pass
else:
assert False, f"Should have raised StopIteration, got {out} instead"
assert next(suffix) == 7
if __name__ == '__main__':
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment