Last active
August 30, 2017 08:20
-
-
Save reutsharabani/ea98289f479d9b5abf4b86a2750eb623 to your computer and use it in GitHub Desktop.
Python chunks function with overlaps
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def skip(iterator, n): | |
""" | |
skip n items in iterator | |
from itertools recipes: | |
https://docs.python.org/3/library/itertools.html#itertools-recipes | |
(see "consume") | |
:param iterator: iterator to skip | |
:param n: items to skip | |
:return: iterator with n items skipped | |
""" | |
# Use functions that consume iterators at C speed. | |
if n is None: | |
# feed the entire iterator into a zero-length deque | |
collections.deque(iterator, maxlen=0) | |
else: | |
# advance to the empty slice starting at position n | |
next(islice(iterator, n, n), None) | |
return iterator | |
def chunks(iterable, n, step=None, trim=False): | |
""" | |
create a generator for chunks from a general iterable | |
:param iterable: any iterable item | |
:param n: chunk size | |
:param int step: step size | |
:param int trim: trim trailing results smaller than n | |
:return: generator of n-chunks | |
""" | |
its = tee(iter(iterable), n) | |
# advance iterators to create iterator window | |
its = starmap(skip, map(reversed, enumerate(its))) | |
# apply step to all iterators | |
its = (islice(it, 0, None, step or n) for it in its) | |
# zip iterators to create all chunks | |
windows = zip_longest(*its, fillvalue=_NOT_SET) | |
# remove _NOT_SETS from chunks | |
windows = (tuple(v for v in w if v is not _NOT_SET) if | |
w[-1] is _NOT_SET else w for w in windows) | |
# filter results smaller than n | |
if trim: | |
windows = (w for w in windows if len(w) == n) | |
yield from windows |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment