maffoo/amdahl.png

## amdahl.png

      
    Raw
  

              amdahl.png
            
          
## amdahl.py
"""
Will it thread?

A simple demonstration of Amdahl's law, which quantifies the amount of speedup
that can be obtained by parallelizing a program when only some fraction of the
code is parallelizable (http://en.wikipedia.org/wiki/Amdahl%27s_law).

This is of interest in python because the python interpreter has a global
interpreter lock (GIL) which means that even "multi-threaded" python code is
effectively single-threaded and cannot take advantage of multiple cores.
Numpy code is an exception because many numpy routines that call into C
libraries actually release the GIL, and so we might be able to benefit from
multithreading, depending on the actual fraction of time spent in C code.

We test this for a couple of different numpy functions, convolve and fft, which
turn out to behave rather differently from each other and, in the case of fft,
differently based on the arguments passed in as well.
"""

from functools import partial
import time
import threading
import matplotlib.pyplot as plt
import numpy as np

def seq(n, f, *args, **kw):
    """Time to execute a function n times in sequence"""
    start = time.time()
    for _ in xrange(n):
        f(*args, **kw)
    end = time.time()
    return end - start

def par(n, f, *args, **kw):
    """Time to execute a function n times in parallel, each in its own thread"""
    threads = [threading.Thread(target=f, args=args, kwargs=kw)
               for _ in range(n)]
    start = time.time()
    for t in threads:
        t.start()
    for t in threads:
        t.join()
    end = time.time()
    return end - start

def par_speedup(n, f, *args, **kw):
    """Speedup of parallel (threaded) versus sequential execution"""
    t_par = par(n, f, *args, **kw)
    t_seq = seq(n, f, *args, **kw)
    return t_seq / t_par

def parallelizable_fraction(n, speedup):
    """Determine the fraction of code that is parallelizable using Amdahl's law

    Given the number of parallel threads n and the speedup, returns the
    effective fraction of the code that can be parallelized.
    """
    return (1/speedup - 1) / (1/ns - 1)


ns = np.arange(1, 11)

# Note: these are pretty slow; we want to maximize the time spent in C code
test_cases = {
    'convolve': partial(np.convolve, np.arange(100000), np.arange(10000)),
    'fft': partial(np.fft.fft, np.arange(10000000)),
    'fft_pow2': partial(np.fft.fft, np.arange(2**23)),
}
test_names = sorted(test_cases.keys())
test_results = {name: np.array([par_speedup(n, f) for n in ns])
                for name, f in test_cases.items()}

plt.subplot(211)
for name in test_names:
    y = test_results[name]
    plt.plot(ns, y, '.-', label=name)
plt.ylabel('speedup')
plt.legend()

plt.subplot(212)
for name in test_names:
    y = parallelizable_fraction(ns, test_results[name])
    plt.plot(ns, y, '.-', label=name)
plt.xlabel('parallel threads')
plt.ylabel('parallelizable fraction')
plt.legend()

plt.show()
	"""
	Will it thread?

	A simple demonstration of Amdahl's law, which quantifies the amount of speedup
	that can be obtained by parallelizing a program when only some fraction of the
	code is parallelizable (http://en.wikipedia.org/wiki/Amdahl%27s_law).

	This is of interest in python because the python interpreter has a global
	interpreter lock (GIL) which means that even "multi-threaded" python code is
	effectively single-threaded and cannot take advantage of multiple cores.
	Numpy code is an exception because many numpy routines that call into C
	libraries actually release the GIL, and so we might be able to benefit from
	multithreading, depending on the actual fraction of time spent in C code.

	We test this for a couple of different numpy functions, convolve and fft, which
	turn out to behave rather differently from each other and, in the case of fft,
	differently based on the arguments passed in as well.
	"""

	from functools import partial
	import time
	import threading
	import matplotlib.pyplot as plt
	import numpy as np

	def seq(n, f, args, *kw):
	"""Time to execute a function n times in sequence"""
	start = time.time()
	for _ in xrange(n):
	f(args, *kw)
	end = time.time()
	return end - start

	def par(n, f, args, *kw):
	"""Time to execute a function n times in parallel, each in its own thread"""
	threads = [threading.Thread(target=f, args=args, kwargs=kw)
	for _ in range(n)]
	start = time.time()
	for t in threads:
	t.start()
	for t in threads:
	t.join()
	end = time.time()
	return end - start

	def par_speedup(n, f, args, *kw):
	"""Speedup of parallel (threaded) versus sequential execution"""
	t_par = par(n, f, args, *kw)
	t_seq = seq(n, f, args, *kw)
	return t_seq / t_par

	def parallelizable_fraction(n, speedup):
	"""Determine the fraction of code that is parallelizable using Amdahl's law

	Given the number of parallel threads n and the speedup, returns the
	effective fraction of the code that can be parallelized.
	"""
	return (1/speedup - 1) / (1/ns - 1)


	ns = np.arange(1, 11)

	# Note: these are pretty slow; we want to maximize the time spent in C code
	test_cases = {
	'convolve': partial(np.convolve, np.arange(100000), np.arange(10000)),
	'fft': partial(np.fft.fft, np.arange(10000000)),
	'fft_pow2': partial(np.fft.fft, np.arange(2**23)),
	}
	test_names = sorted(test_cases.keys())
	test_results = {name: np.array([par_speedup(n, f) for n in ns])
	for name, f in test_cases.items()}

	plt.subplot(211)
	for name in test_names:
	y = test_results[name]
	plt.plot(ns, y, '.-', label=name)
	plt.ylabel('speedup')
	plt.legend()

	plt.subplot(212)
	for name in test_names:
	y = parallelizable_fraction(ns, test_results[name])
	plt.plot(ns, y, '.-', label=name)
	plt.xlabel('parallel threads')
	plt.ylabel('parallelizable fraction')
	plt.legend()

	plt.show()