Skip to content

Instantly share code, notes, and snippets.

@jcrist
Created July 26, 2017 22:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jcrist/739001882738b1bde1125feb4ac3899e to your computer and use it in GitHub Desktop.
Save jcrist/739001882738b1bde1125feb4ac3899e to your computer and use it in GitHub Desktop.
Benchmark Blackscholes
import argparse
from timeit import default_timer
from collections import namedtuple
import numpy as np
import dask.array as da
from dask.threaded import get
from scipy.special import erf
S0L = 10.0
S0H = 50.0
XL = 10.0
XH = 50.0
TL = 1.0
TH = 2.0
RISK_FREE = 0.1
VOLATILITY = 0.2
def using_array_api(nopt, price, strike, t, rate, vol):
mr = -rate
sig_sig_two = vol * vol * 2
P = price
S = strike
T = t
a = da.log(P / S)
b = T * mr
z = T * sig_sig_two
c = 0.25 * z
y = da.map_blocks(lambda x: 1.0 / np.sqrt(x), z)
w1 = (a - b + c) * y
w2 = (a - b - c) * y
d1 = 0.5 + 0.5 * da.map_blocks(erf, w1)
d2 = 0.5 + 0.5 * da.map_blocks(erf, w2)
Se = da.exp(b) * S
call = P * d1 - Se * d2
put = call - P + Se
return da.stack((put, call))
def black_scholes(nopt, price, strike, t, rate, vol):
mr = -rate
sig_sig_two = vol * vol * 2
P = price
S = strike
T = t
a = np.log(P / S)
b = T * mr
z = T * sig_sig_two
c = 0.25 * z
y = 1.0 / np.sqrt(z)
temp = a - b
w1 = (temp + c) * y
w2 = (temp - c) * y
d1 = 0.5 + 0.5 * erf(w1)
d2 = 0.5 + 0.5 * erf(w2)
Se = np.exp(b) * S
call = P * d1 - Se * d2
put = call - P + Se
return np.stack((call, put))
def using_map_blocks(nopt, price, strike, t, rate, vol):
return da.map_blocks(black_scholes, nopt, price, strike, t, rate,
vol, new_axis=0)
class BenchResult(namedtuple('BenchResult',
['name', 'chunksize', 'numblocks', 'ntasks',
'ntasks_opt', 'build', 'optimize', 'compute',
'finalize', 'total'])):
def __repr__(self):
msg = ("%s\n"
"=======================\n"
"chunksize: %d\n"
"numblocks: %d\n"
"-----------------------\n"
"Graph Size: %d\n"
"Optimized Size: %d\n"
"-----------------------\n"
"Build: %.3f s\n"
"Optimize: %.3f s\n") % (self.name, self.chunksize,
self.numblocks, self.ntasks,
self.ntasks_opt, self.build,
self.optimize)
if self.compute is not None:
msg += ("Compute: %.3f s\n"
"Finalize: %.3f s\n") % (self.compute, self.finalize)
msg += ("Total: %.3f s" % self.total)
return msg
def bench(nopt, chunksize, compute=True):
price = da.random.uniform(S0L, S0H, nopt, chunks=chunksize)
strike = da.random.uniform(XL, XH, nopt, chunks=chunksize)
t = da.random.uniform(TL, TH, nopt, chunks=chunksize)
out = []
for func in [using_map_blocks, using_array_api]:
t0 = default_timer()
o = func(nopt, price, strike, t, RISK_FREE, VOLATILITY)
t1 = default_timer()
keys = o._keys()
dsk = o._optimize(o.dask, keys)
t2 = default_timer()
if compute:
parts = get(dsk, keys)
t3 = default_timer()
o._finalize(parts)
t4 = default_timer()
t_compute = t3 - t2
t_finalize = t4 - t3
t_total = t4 - t0
else:
t_compute = t_finalize = None
t_total = t2 - t0
t_build = t1 - t0
t_optimize = t2 - t1
out.append(BenchResult(func.__name__, chunksize, sum(price.numblocks),
len(o.dask), len(dsk), t_build, t_optimize,
t_compute, t_finalize, t_total))
return tuple(out)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--nopt', required=False, default=1e7, type=float)
parser.add_argument('--chunksize', required=False, default=1e5, type=float)
parser.add_argument('--no-compute', action='store_false', dest='compute')
parser.add_argument('--compute', action='store_true', dest='compute')
parser.set_defaults(compute=True)
args = parser.parse_args()
nopt = int(args.nopt)
chunksize = int(args.chunksize)
compute = args.compute
for o in bench(nopt, chunksize, compute=compute):
print(o)
print("")
# coding: utf-8
import matplotlib
matplotlib.use('TKAgg')
import matplotlib.pyplot as plt
from matplotlib import ticker as mtick
import pandas as pd
from bench import bench
# Scaling build across number of blocks
results = [bench(int(nopt), int(1e5), compute=False) for nopt in [1e6, 1e7, 1e8, 1e9]]
data = {'map_blocks': [i.total for i, _ in results],
'array_api': [i.total for _, i in results],
'numblocks': [i.numblocks for i, _ in results]}
df = pd.DataFrame(data)
df.plot.line(x='numblocks', title='Graph Build Time vs Number of Blocks', logy=True)
plt.ylabel('time (s)')
plt.savefig('numblocks.png')
# Scaling compute across block size
chunksizes = [int(i) for i in [1e5, 5e5, 1e6]]
numblocks = 100
results = [bench(int(numblocks * chunksize), int(chunksize))
for chunksize in chunksizes]
map_blocks = {}
array_api = {}
for k in [['build', 'optimize'], ['compute', 'finalize']]:
map_blocks[k[0]] = [getattr(i, k[0]) + getattr(i, k[1]) for i, _ in results]
array_api[k[0]] = [getattr(i, k[0]) + getattr(i, k[1]) for _, i in results]
map_blocks = pd.DataFrame(map_blocks, index=pd.Index(chunksizes, name='chunksize'))
array_api = pd.DataFrame(array_api, index=pd.Index(chunksizes, name='chunksize'))
df = pd.concat([array_api, map_blocks], axis=1, keys=['array_api', 'map_blocks'])
# Plot total time vs chunksize
total_time = df.T.groupby(level=0).sum().T
total_time.index = total_time.index.astype('f8')
ax = total_time.plot.line(title='Total Time vs Chunksize', logy=True, rot=45, figsize=(6, 7))
ax.xaxis.set_major_formatter(mtick.FormatStrFormatter('%.0e'))
plt.ylabel('time (s)')
plt.savefig('chunksize.png')
# Grouped bar charts
fig, axes = plt.subplots(1, 3, figsize=(12, 6), sharey=True)
df2 = df.stack().T
for i, n in enumerate(chunksizes):
df2[n].plot(kind='bar', stacked=True, sharex=False, sharey=True, ax=axes[i],
title='%.0e' % n, legend=(not i), rot=False)
axes[0].set_ylabel('time (s)')
plt.suptitle('Varying Chunksize, Fixed Numblocks = %d' % numblocks)
plt.savefig('chunksize2.png')
# Fix size, varying chunksize (and thus numblocks)
chunksizes = [int(i) for i in [1e5, 5e5, 1e6]]
results = [bench(int(1e7), int(chunksize)) for chunksize in chunksizes]
map_blocks = {}
array_api = {}
for k in [['build', 'optimize'], ['compute', 'finalize']]:
map_blocks[k[0]] = [getattr(i, k[0]) + getattr(i, k[1]) for i, _ in results]
array_api[k[0]] = [getattr(i, k[0]) + getattr(i, k[1]) for _, i in results]
map_blocks = pd.DataFrame(map_blocks, index=pd.Index(chunksizes, name='chunksize'))
array_api = pd.DataFrame(array_api, index=pd.Index(chunksizes, name='chunksize'))
df = pd.concat([array_api, map_blocks], axis=1, keys=['array_api', 'map_blocks'])
# Grouped bar charts
fig, axes = plt.subplots(1, 3, figsize=(12, 6), sharey=True)
df2 = df.stack().T
for i, n in enumerate(chunksizes):
df2[n].plot(kind='bar', stacked=True, sharex=False, sharey=True, ax=axes[i],
title='%.0e' % n, legend=(not i), rot=False)
axes[0].set_ylabel('time (s)')
plt.suptitle('Varying Chunksize, Fixed Size = 1e7')
plt.savefig('chunksize3.png')
@jcrist
Copy link
Author

jcrist commented Jul 26, 2017

Resulting Plots:

numblocks

chunksize

chunksize2

chunksize3

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment