Skip to content

Instantly share code, notes, and snippets.

@adithyabsk
Last active July 1, 2018 21:31
Show Gist options
  • Save adithyabsk/fe0b1d31955aa26742368219bae17e17 to your computer and use it in GitHub Desktop.
Save adithyabsk/fe0b1d31955aa26742368219bae17e17 to your computer and use it in GitHub Desktop.
Benchmarking multiprocessing w/ initialization w/out initialization w/partial
python3.6 test_pool_map.py without_initializer
aux_data 1,000 ints : 0.126059 secs per iteration 9999000
aux_data 10,000 ints : 0.134954 secs per iteration 9999000
aux_data 100,000 ints : 0.144767 secs per iteration 9999000
aux_data 1,000,000 ints : 0.238309 secs per iteration 9999000
aux_data 10,000,000 ints : 1.329838 secs per iteration 9999000
python3.6 test_pool_map.py with_partial
aux_data 1,000 ints : 0.134125 secs per iteration 9999000
aux_data 10,000 ints : 0.144146 secs per iteration 9999000
aux_data 100,000 ints : 0.145000 secs per iteration 9999000
aux_data 1,000,000 ints : 0.242282 secs per iteration 9999000
aux_data 10,000,000 ints : 1.291135 secs per iteration 9999000
python3.6 test_pool_map.py with_initializer
aux_data 1,000 ints : 0.144102 secs per iteration 9999000
aux_data 10,000 ints : 0.134644 secs per iteration 9999000
aux_data 100,000 ints : 0.143082 secs per iteration 9999000
aux_data 1,000,000 ints : 0.145427 secs per iteration 9999000
aux_data 10,000,000 ints : 0.150022 secs per iteration 9999000
python3.6 test_pool_map.py without_initializer
aux_data 1,000 ints : 0.136618 secs per iteration 9999000
aux_data 10,000 ints : 0.134275 secs per iteration 9999000
aux_data 100,000 ints : 0.164732 secs per iteration 9999000
aux_data 1,000,000 ints : 0.407220 secs per iteration 9999000
aux_data 10,000,000 ints : 4.163233 secs per iteration 9999000
python3.6 test_pool_map.py with_partial
aux_data 1,000 ints : 0.145331 secs per iteration 9999000
aux_data 10,000 ints : 0.154624 secs per iteration 9999000
aux_data 100,000 ints : 0.166990 secs per iteration 9999000
aux_data 1,000,000 ints : 0.428308 secs per iteration 9999000
aux_data 10,000,000 ints : 4.118038 secs per iteration 9999000
python3.6 test_pool_map.py with_initializer
aux_data 1,000 ints : 0.144303 secs per iteration 9999000
aux_data 10,000 ints : 0.143502 secs per iteration 9999000
aux_data 100,000 ints : 0.144219 secs per iteration 9999000
aux_data 1,000,000 ints : 0.145476 secs per iteration 9999000
aux_data 10,000,000 ints : 0.149997 secs per iteration 9999000
# Original Code from
# https://medium.com/@rvprasad/data-and-chunk-sizes-matter-when-using-multiprocessing-pool-map-in-python-5023c96875ef
import multiprocessing
import time
import functools
CPU_COUNT = 4
CHUNK_SIZE = None
def worker(varying_data, aux_data):
t = 0
for j in range(1, 10000):
t += varying_data
return t
aux_data = None
def initializer(init_data):
global aux_data
aux_data = init_data
def with_initializer_worker_wrapper(varying_data):
return worker(varying_data, aux_data)
def with_initializer():
iterations = 10
for i in range(3, 8):
start_time = time.time()
aux_data = [i] * pow(10, i)
pool = multiprocessing.Pool(CPU_COUNT, initializer, (aux_data,))
data = [1 for x in range(1, 1001)]
tmp = 0
for i in range(1, iterations):
tmp = sum(pool.map(with_initializer_worker_wrapper, data,
chunksize=CHUNK_SIZE))
pool.close()
pool.join()
pool.terminate()
end_time = time.time()
secs_per_iteration = (end_time - start_time) / iterations
print("aux_data {0:>10,} ints : {1:>6.6f} secs per iteration {2}"
.format(len(aux_data), secs_per_iteration, tmp))
def with_partial():
iterations = 10
for i in range(3, 8):
start_time = time.time()
aux_data = [i] * pow(10, i)
pool = multiprocessing.Pool(CPU_COUNT)
data = [1 for x in range(1, 1001)]
tmp = 0
pworker = functools.partial(worker, aux_data=aux_data)
for i in range(1, iterations):
tmp = sum(pool.map(pworker, data, chunksize=CHUNK_SIZE))
pool.close()
pool.join()
pool.terminate()
end_time = time.time()
secs_per_iteration = (end_time - start_time) / iterations
print("aux_data {0:>10,} ints : {1:>6.6f} secs per iteration {2}"
.format(len(aux_data), secs_per_iteration, tmp))
def without_initializer_worker_wrapper(data):
return worker(*data)
def without_initializer():
iterations = 10
for i in range(3, 8):
start_time = time.time()
aux_data = [i] * pow(10, i)
pool = multiprocessing.Pool(CPU_COUNT)
data = [(1, aux_data) for x in range(1, 1001)]
tmp = 0
for i in range(1, iterations):
tmp = sum(pool.map(without_initializer_worker_wrapper, data,
chunksize=CHUNK_SIZE))
pool.close()
pool.join()
pool.terminate()
end_time = time.time()
secs_per_iteration = (end_time - start_time) / iterations
print("aux_data {0:>10,} ints : {1:>6.6f} secs per iteration {2}"
.format(len(aux_data), secs_per_iteration, tmp))
if __name__ == '__main__':
print('python3.6 test_pool_map.py without_initializer')
without_initializer()
print('python3.6 test_pool_map.py with_partial')
with_partial()
print('python3.6 test_pool_map.py with_initializer')
with_initializer()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment