Created
October 9, 2022 14:37
-
-
Save shpigunov/ece4d9d09bfa3bfa40a8ebe94ba42d2b to your computer and use it in GitHub Desktop.
Creating batches with Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This is a study on the fastest way to split an iteralbe (list) into batches of a set size. | |
Note that this study omits attempts parallelization and does not use generators. | |
""" | |
from typing import List | |
from itertools import islice | |
def batch(iterable: List, batch_size: int) -> List[List]: | |
res = [] | |
for i in range(0, len(iterable), batch_size): | |
if i + batch_size > len(iterable): | |
res.append(iterable[i:i+len(iterable)]) | |
else: | |
res.append(iterable[i:i+batch_size]) | |
return res | |
def batch1(iterable: List, batch_size: int) -> List[List]: | |
iterable = iterable[::-1] | |
res = [] | |
while iterable: | |
batch = [] | |
for i in range(batch_size): | |
try: | |
batch.append(iterable.pop()) | |
except IndexError: | |
break | |
res.append(batch) | |
return res | |
def batch2(iterable: List, batch_size: int) -> List[List]: | |
res = [] | |
for i in range(0, len(iterable), batch_size): | |
res.append([x for x in islice(iterable, i, i+batch_size)]) | |
return res | |
def batch3(iterable: List, batch_size: int) -> List[List]: | |
return [[x for x in islice(iterable, i, i+batch_size)] for i in range(0, len(iterable), batch_size)] | |
a = [x for x in range(20000)] | |
# Paste these into IPython to find out which is faster! | |
# %timeit batch(a, 443) | |
# %timeit batch1(a, 443) | |
# %timeit batch2(a, 443) | |
#%timeit batch3(a, 443) | |
# My results were: | |
# 78.1 µs ± 1.06 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each) | |
# 6.13 ms ± 60.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) | |
# 2.59 ms ± 20.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) | |
# 2.58 ms ± 20.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) | |
# %timeit batch3(a, 443) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment