Skip to content

Instantly share code, notes, and snippets.

@shpigunov
Created October 9, 2022 14:37
Show Gist options
  • Save shpigunov/ece4d9d09bfa3bfa40a8ebe94ba42d2b to your computer and use it in GitHub Desktop.
Save shpigunov/ece4d9d09bfa3bfa40a8ebe94ba42d2b to your computer and use it in GitHub Desktop.
Creating batches with Python
"""
This is a study on the fastest way to split an iteralbe (list) into batches of a set size.
Note that this study omits attempts parallelization and does not use generators.
"""
from typing import List
from itertools import islice
def batch(iterable: List, batch_size: int) -> List[List]:
res = []
for i in range(0, len(iterable), batch_size):
if i + batch_size > len(iterable):
res.append(iterable[i:i+len(iterable)])
else:
res.append(iterable[i:i+batch_size])
return res
def batch1(iterable: List, batch_size: int) -> List[List]:
iterable = iterable[::-1]
res = []
while iterable:
batch = []
for i in range(batch_size):
try:
batch.append(iterable.pop())
except IndexError:
break
res.append(batch)
return res
def batch2(iterable: List, batch_size: int) -> List[List]:
res = []
for i in range(0, len(iterable), batch_size):
res.append([x for x in islice(iterable, i, i+batch_size)])
return res
def batch3(iterable: List, batch_size: int) -> List[List]:
return [[x for x in islice(iterable, i, i+batch_size)] for i in range(0, len(iterable), batch_size)]
a = [x for x in range(20000)]
# Paste these into IPython to find out which is faster!
# %timeit batch(a, 443)
# %timeit batch1(a, 443)
# %timeit batch2(a, 443)
#%timeit batch3(a, 443)
# My results were:
# 78.1 µs ± 1.06 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
# 6.13 ms ± 60.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# 2.59 ms ± 20.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# 2.58 ms ± 20.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# %timeit batch3(a, 443)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment