Created
March 8, 2017 08:14
-
-
Save nicolasdespres/e07aa951ba62aeaea60e740238571664 to your computer and use it in GitHub Desktop.
Iterate over an iteratable whose items are shuffled
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class shuffle_iter(Iterator): | |
"""Shuffle items of an iterator. | |
The amount of shuffling is controlled by the `capacity`. The more capacity | |
the more shuffling. | |
Args: | |
`iterable`: the source of data to shuffle | |
`capacity`: the size of the internal buffer used for shuffling | |
""" | |
def __init__(self, iterable, capacity=1000): | |
if not isinstance(capacity, int): | |
raise TypeError("capacity must be int, not {}" | |
.format(type(capacity).__name__)) | |
if capacity <= 0: | |
raise ValueError("capacity must be positive, not {}" | |
.format(capacity)) | |
self._iterable = iterable | |
self._it = iter(iterable) | |
self._capa = capacity | |
self.clear() | |
@property | |
def capacity(self): | |
return self._capa | |
def __len__(self): | |
"""Return the total number of iteration. | |
Works only if the source `iterable` has a len(). | |
""" | |
return len(self._iterable) | |
def _refill(self): | |
self._buf = take_upto(self._it, self._capa) | |
random.shuffle(self._buf) | |
self._i = 0 | |
def _take(self, count): | |
objects = self._buf[self._i:self._i+count] | |
self._i += count | |
return objects | |
def _take_upto(self, count): | |
objects = [] | |
while count > 0: | |
taken = self._take(count) | |
ntaken = len(taken) | |
if ntaken < count: | |
self._refill() | |
if len(self._buf) == 0: | |
raise StopIteration | |
count -= ntaken | |
objects.extend(taken) | |
return objects | |
def __next__(self): | |
while True: | |
objects = self._take_upto(1) | |
return objects[0] | |
def clear(self): | |
"""Clear the internal memory.""" | |
self._buf = [] | |
self._i = 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment