Skip to content

Instantly share code, notes, and snippets.

@nicolasdespres
Created March 8, 2017 08:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nicolasdespres/e07aa951ba62aeaea60e740238571664 to your computer and use it in GitHub Desktop.
Save nicolasdespres/e07aa951ba62aeaea60e740238571664 to your computer and use it in GitHub Desktop.
Iterate over an iteratable whose items are shuffled
class shuffle_iter(Iterator):
"""Shuffle items of an iterator.
The amount of shuffling is controlled by the `capacity`. The more capacity
the more shuffling.
Args:
`iterable`: the source of data to shuffle
`capacity`: the size of the internal buffer used for shuffling
"""
def __init__(self, iterable, capacity=1000):
if not isinstance(capacity, int):
raise TypeError("capacity must be int, not {}"
.format(type(capacity).__name__))
if capacity <= 0:
raise ValueError("capacity must be positive, not {}"
.format(capacity))
self._iterable = iterable
self._it = iter(iterable)
self._capa = capacity
self.clear()
@property
def capacity(self):
return self._capa
def __len__(self):
"""Return the total number of iteration.
Works only if the source `iterable` has a len().
"""
return len(self._iterable)
def _refill(self):
self._buf = take_upto(self._it, self._capa)
random.shuffle(self._buf)
self._i = 0
def _take(self, count):
objects = self._buf[self._i:self._i+count]
self._i += count
return objects
def _take_upto(self, count):
objects = []
while count > 0:
taken = self._take(count)
ntaken = len(taken)
if ntaken < count:
self._refill()
if len(self._buf) == 0:
raise StopIteration
count -= ntaken
objects.extend(taken)
return objects
def __next__(self):
while True:
objects = self._take_upto(1)
return objects[0]
def clear(self):
"""Clear the internal memory."""
self._buf = []
self._i = 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment