Created
July 26, 2017 22:15
-
-
Save jcrist/739001882738b1bde1125feb4ac3899e to your computer and use it in GitHub Desktop.
Benchmark Blackscholes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from timeit import default_timer | |
from collections import namedtuple | |
import numpy as np | |
import dask.array as da | |
from dask.threaded import get | |
from scipy.special import erf | |
S0L = 10.0 | |
S0H = 50.0 | |
XL = 10.0 | |
XH = 50.0 | |
TL = 1.0 | |
TH = 2.0 | |
RISK_FREE = 0.1 | |
VOLATILITY = 0.2 | |
def using_array_api(nopt, price, strike, t, rate, vol): | |
mr = -rate | |
sig_sig_two = vol * vol * 2 | |
P = price | |
S = strike | |
T = t | |
a = da.log(P / S) | |
b = T * mr | |
z = T * sig_sig_two | |
c = 0.25 * z | |
y = da.map_blocks(lambda x: 1.0 / np.sqrt(x), z) | |
w1 = (a - b + c) * y | |
w2 = (a - b - c) * y | |
d1 = 0.5 + 0.5 * da.map_blocks(erf, w1) | |
d2 = 0.5 + 0.5 * da.map_blocks(erf, w2) | |
Se = da.exp(b) * S | |
call = P * d1 - Se * d2 | |
put = call - P + Se | |
return da.stack((put, call)) | |
def black_scholes(nopt, price, strike, t, rate, vol): | |
mr = -rate | |
sig_sig_two = vol * vol * 2 | |
P = price | |
S = strike | |
T = t | |
a = np.log(P / S) | |
b = T * mr | |
z = T * sig_sig_two | |
c = 0.25 * z | |
y = 1.0 / np.sqrt(z) | |
temp = a - b | |
w1 = (temp + c) * y | |
w2 = (temp - c) * y | |
d1 = 0.5 + 0.5 * erf(w1) | |
d2 = 0.5 + 0.5 * erf(w2) | |
Se = np.exp(b) * S | |
call = P * d1 - Se * d2 | |
put = call - P + Se | |
return np.stack((call, put)) | |
def using_map_blocks(nopt, price, strike, t, rate, vol): | |
return da.map_blocks(black_scholes, nopt, price, strike, t, rate, | |
vol, new_axis=0) | |
class BenchResult(namedtuple('BenchResult', | |
['name', 'chunksize', 'numblocks', 'ntasks', | |
'ntasks_opt', 'build', 'optimize', 'compute', | |
'finalize', 'total'])): | |
def __repr__(self): | |
msg = ("%s\n" | |
"=======================\n" | |
"chunksize: %d\n" | |
"numblocks: %d\n" | |
"-----------------------\n" | |
"Graph Size: %d\n" | |
"Optimized Size: %d\n" | |
"-----------------------\n" | |
"Build: %.3f s\n" | |
"Optimize: %.3f s\n") % (self.name, self.chunksize, | |
self.numblocks, self.ntasks, | |
self.ntasks_opt, self.build, | |
self.optimize) | |
if self.compute is not None: | |
msg += ("Compute: %.3f s\n" | |
"Finalize: %.3f s\n") % (self.compute, self.finalize) | |
msg += ("Total: %.3f s" % self.total) | |
return msg | |
def bench(nopt, chunksize, compute=True): | |
price = da.random.uniform(S0L, S0H, nopt, chunks=chunksize) | |
strike = da.random.uniform(XL, XH, nopt, chunks=chunksize) | |
t = da.random.uniform(TL, TH, nopt, chunks=chunksize) | |
out = [] | |
for func in [using_map_blocks, using_array_api]: | |
t0 = default_timer() | |
o = func(nopt, price, strike, t, RISK_FREE, VOLATILITY) | |
t1 = default_timer() | |
keys = o._keys() | |
dsk = o._optimize(o.dask, keys) | |
t2 = default_timer() | |
if compute: | |
parts = get(dsk, keys) | |
t3 = default_timer() | |
o._finalize(parts) | |
t4 = default_timer() | |
t_compute = t3 - t2 | |
t_finalize = t4 - t3 | |
t_total = t4 - t0 | |
else: | |
t_compute = t_finalize = None | |
t_total = t2 - t0 | |
t_build = t1 - t0 | |
t_optimize = t2 - t1 | |
out.append(BenchResult(func.__name__, chunksize, sum(price.numblocks), | |
len(o.dask), len(dsk), t_build, t_optimize, | |
t_compute, t_finalize, t_total)) | |
return tuple(out) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--nopt', required=False, default=1e7, type=float) | |
parser.add_argument('--chunksize', required=False, default=1e5, type=float) | |
parser.add_argument('--no-compute', action='store_false', dest='compute') | |
parser.add_argument('--compute', action='store_true', dest='compute') | |
parser.set_defaults(compute=True) | |
args = parser.parse_args() | |
nopt = int(args.nopt) | |
chunksize = int(args.chunksize) | |
compute = args.compute | |
for o in bench(nopt, chunksize, compute=compute): | |
print(o) | |
print("") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import matplotlib | |
matplotlib.use('TKAgg') | |
import matplotlib.pyplot as plt | |
from matplotlib import ticker as mtick | |
import pandas as pd | |
from bench import bench | |
# Scaling build across number of blocks | |
results = [bench(int(nopt), int(1e5), compute=False) for nopt in [1e6, 1e7, 1e8, 1e9]] | |
data = {'map_blocks': [i.total for i, _ in results], | |
'array_api': [i.total for _, i in results], | |
'numblocks': [i.numblocks for i, _ in results]} | |
df = pd.DataFrame(data) | |
df.plot.line(x='numblocks', title='Graph Build Time vs Number of Blocks', logy=True) | |
plt.ylabel('time (s)') | |
plt.savefig('numblocks.png') | |
# Scaling compute across block size | |
chunksizes = [int(i) for i in [1e5, 5e5, 1e6]] | |
numblocks = 100 | |
results = [bench(int(numblocks * chunksize), int(chunksize)) | |
for chunksize in chunksizes] | |
map_blocks = {} | |
array_api = {} | |
for k in [['build', 'optimize'], ['compute', 'finalize']]: | |
map_blocks[k[0]] = [getattr(i, k[0]) + getattr(i, k[1]) for i, _ in results] | |
array_api[k[0]] = [getattr(i, k[0]) + getattr(i, k[1]) for _, i in results] | |
map_blocks = pd.DataFrame(map_blocks, index=pd.Index(chunksizes, name='chunksize')) | |
array_api = pd.DataFrame(array_api, index=pd.Index(chunksizes, name='chunksize')) | |
df = pd.concat([array_api, map_blocks], axis=1, keys=['array_api', 'map_blocks']) | |
# Plot total time vs chunksize | |
total_time = df.T.groupby(level=0).sum().T | |
total_time.index = total_time.index.astype('f8') | |
ax = total_time.plot.line(title='Total Time vs Chunksize', logy=True, rot=45, figsize=(6, 7)) | |
ax.xaxis.set_major_formatter(mtick.FormatStrFormatter('%.0e')) | |
plt.ylabel('time (s)') | |
plt.savefig('chunksize.png') | |
# Grouped bar charts | |
fig, axes = plt.subplots(1, 3, figsize=(12, 6), sharey=True) | |
df2 = df.stack().T | |
for i, n in enumerate(chunksizes): | |
df2[n].plot(kind='bar', stacked=True, sharex=False, sharey=True, ax=axes[i], | |
title='%.0e' % n, legend=(not i), rot=False) | |
axes[0].set_ylabel('time (s)') | |
plt.suptitle('Varying Chunksize, Fixed Numblocks = %d' % numblocks) | |
plt.savefig('chunksize2.png') | |
# Fix size, varying chunksize (and thus numblocks) | |
chunksizes = [int(i) for i in [1e5, 5e5, 1e6]] | |
results = [bench(int(1e7), int(chunksize)) for chunksize in chunksizes] | |
map_blocks = {} | |
array_api = {} | |
for k in [['build', 'optimize'], ['compute', 'finalize']]: | |
map_blocks[k[0]] = [getattr(i, k[0]) + getattr(i, k[1]) for i, _ in results] | |
array_api[k[0]] = [getattr(i, k[0]) + getattr(i, k[1]) for _, i in results] | |
map_blocks = pd.DataFrame(map_blocks, index=pd.Index(chunksizes, name='chunksize')) | |
array_api = pd.DataFrame(array_api, index=pd.Index(chunksizes, name='chunksize')) | |
df = pd.concat([array_api, map_blocks], axis=1, keys=['array_api', 'map_blocks']) | |
# Grouped bar charts | |
fig, axes = plt.subplots(1, 3, figsize=(12, 6), sharey=True) | |
df2 = df.stack().T | |
for i, n in enumerate(chunksizes): | |
df2[n].plot(kind='bar', stacked=True, sharex=False, sharey=True, ax=axes[i], | |
title='%.0e' % n, legend=(not i), rot=False) | |
axes[0].set_ylabel('time (s)') | |
plt.suptitle('Varying Chunksize, Fixed Size = 1e7') | |
plt.savefig('chunksize3.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Resulting Plots: