FanchenBao/multiprocess_benchmark.py

## multiprocess_benchmark.py
#! /usr/bin/python

import multiprocessing as mp
import time

'''
A simple benchmark testing while learning about multiprocessing in python (2.7.10)
It is apparent from this test, conducted in MacBook Air OS 10.13.6, that using Pool
class of multiprocessing yields the best runtime performance. My guess is that in
the parallel() function, each new process is spawn up manually via a for loop, whereas
in pool() function, the processes are generated internally via the library function
call to map_async(). The latter must have better optimization.

Note that "sysctl -a | grep maxproc" returns

kern.maxproc: 1064
kern.maxprocperuid: 709

These numbers are the absolutely maximum processes allowed for the system without it breaks down.
Normally, getting the process count close to maxprocperuid could already lead to system shut down.
The question is, if multiprocessing.cpu_count is only 4 on this machine, how could I be allowed to
have 709 user processes? The answer is swapping. The machine can run at most 4 processes truly in
parallel. Anything more than that, the OS uses fancy scheduling and swapping to maintain an illusion
of concurrency. But once the number of processes becomes too big, the system can enter thrashing and
that's the reason why an upper limit is set.

In Pool class, one can designate the number of processes to use. One can make as many processes as he
likes, but the principle is the same: on this machine, only 4 processes can be run at the same time,
regardless of how many processes created for the Pool.

From benchmark testing, declaring more than four processes in Pool class did not yield better runtime.
Further reading about cpu_count and number of processes to declare in Pool class:
https://stackoverflow.com/questions/20039659/python-multiprocessings-pool-process-limit/20039847


Output (measuring unit in seconds):
iteration #1
serial: 0.238783836365
parall: 0.244979858398
pool:   0.0794720649719
iteration #2
serial: 0.5089199543
parall: 0.268113136292
pool:   0.0120952129364
iteration #4
serial: 1.13017201424
parall: 0.504716873169
pool:   0.0117599964142
iteration #8
serial: 2.34234189987
parall: 0.991414070129
pool:   0.0127170085907
iteration #16
serial: 4.68354511261
parall: 1.9866399765
pool:   0.0138230323792
iteration #32
serial: 9.44051289558
parall: 3.97846603394
pool:   0.0130209922791
iteration #64
serial: 19.0180068016
parall: 7.95093202591
pool:   0.0137310028076
iteration #128
serial: 38.1457829475
parall: 15.9482960701
pool:   0.0195331573486
'''

NUM_RANGE = 10000000

# the busy function
def busy(*args):
    for i in xrange(NUM_RANGE):
        a = 2 * 2;

def parallel(times):
    ''' set up processes to run the task in parallel'''
    processes = [mp.Process(target=busy, args=()) for x in xrange(times)]
    # Run processes
    for p in processes:
        p.start()
    # Exit the completed processes
    for p in processes:
        p.join()


def serial(times):
    ''' set up function to run the task in serial '''
    for i in xrange(times):
        busy()


def parallel_pool(times):
    ''' Using apply_async or map_async function to make the code simpler for setting up multiprocessing'''
    pool = mp.Pool()
#   res = [pool.apply_async(busy, args=(x)) for x in xrange(times)]
    res = pool.map_async(busy, xrange(times)) # note that to get the return value from map_async

iterations = [1, 2, 4, 8, 16, 32, 64, 128] # total number of iterations/processes to be executed

for it in iterations:
    print("iteration #" + str(it))
    beg = time.time()
    serial(it)
    print("serial:\t" + str(time.time() - beg))

    beg = time.time()
    parallel(it)
    print("parall:\t" + str(time.time() - beg))


    beg = time.time()
    parallel_pool(it)
    print("pool:\t" + str(time.time() - beg))
	#! /usr/bin/python

	import multiprocessing as mp
	import time

	'''
	A simple benchmark testing while learning about multiprocessing in python (2.7.10)
	It is apparent from this test, conducted in MacBook Air OS 10.13.6, that using Pool
	class of multiprocessing yields the best runtime performance. My guess is that in
	the parallel() function, each new process is spawn up manually via a for loop, whereas
	in pool() function, the processes are generated internally via the library function
	call to map_async(). The latter must have better optimization.

	Note that "sysctl -a \| grep maxproc" returns

	kern.maxproc: 1064
	kern.maxprocperuid: 709

	These numbers are the absolutely maximum processes allowed for the system without it breaks down.
	Normally, getting the process count close to maxprocperuid could already lead to system shut down.
	The question is, if multiprocessing.cpu_count is only 4 on this machine, how could I be allowed to
	have 709 user processes? The answer is swapping. The machine can run at most 4 processes truly in
	parallel. Anything more than that, the OS uses fancy scheduling and swapping to maintain an illusion
	of concurrency. But once the number of processes becomes too big, the system can enter thrashing and
	that's the reason why an upper limit is set.

	In Pool class, one can designate the number of processes to use. One can make as many processes as he
	likes, but the principle is the same: on this machine, only 4 processes can be run at the same time,
	regardless of how many processes created for the Pool.

	From benchmark testing, declaring more than four processes in Pool class did not yield better runtime.
	Further reading about cpu_count and number of processes to declare in Pool class:
	https://stackoverflow.com/questions/20039659/python-multiprocessings-pool-process-limit/20039847


	Output (measuring unit in seconds):
	iteration #1
	serial: 0.238783836365
	parall: 0.244979858398
	pool: 0.0794720649719
	iteration #2
	serial: 0.5089199543
	parall: 0.268113136292
	pool: 0.0120952129364
	iteration #4
	serial: 1.13017201424
	parall: 0.504716873169
	pool: 0.0117599964142
	iteration #8
	serial: 2.34234189987
	parall: 0.991414070129
	pool: 0.0127170085907
	iteration #16
	serial: 4.68354511261
	parall: 1.9866399765
	pool: 0.0138230323792
	iteration #32
	serial: 9.44051289558
	parall: 3.97846603394
	pool: 0.0130209922791
	iteration #64
	serial: 19.0180068016
	parall: 7.95093202591
	pool: 0.0137310028076
	iteration #128
	serial: 38.1457829475
	parall: 15.9482960701
	pool: 0.0195331573486
	'''

	NUM_RANGE = 10000000

	# the busy function
	def busy(*args):
	for i in xrange(NUM_RANGE):
	a = 2 * 2;

	def parallel(times):
	''' set up processes to run the task in parallel'''
	processes = [mp.Process(target=busy, args=()) for x in xrange(times)]
	# Run processes
	for p in processes:
	p.start()
	# Exit the completed processes
	for p in processes:
	p.join()


	def serial(times):
	''' set up function to run the task in serial '''
	for i in xrange(times):
	busy()


	def parallel_pool(times):
	''' Using apply_async or map_async function to make the code simpler for setting up multiprocessing'''
	pool = mp.Pool()
	# res = [pool.apply_async(busy, args=(x)) for x in xrange(times)]
	res = pool.map_async(busy, xrange(times)) # note that to get the return value from map_async

	iterations = [1, 2, 4, 8, 16, 32, 64, 128] # total number of iterations/processes to be executed

	for it in iterations:
	print("iteration #" + str(it))
	beg = time.time()
	serial(it)
	print("serial:\t" + str(time.time() - beg))

	beg = time.time()
	parallel(it)
	print("parall:\t" + str(time.time() - beg))


	beg = time.time()
	parallel_pool(it)
	print("pool:\t" + str(time.time() - beg))