-
-
Save manodeep/cffd9a5d77510e43ccf0 to your computer and use it in GitHub Desktop.
## Timings with halotools v0.4 and Corrfunc v2.0.0. Updated on Sep 12 (random data, Corrfunc compiled with gcc6.0) | |
## Note this test is capped at 1 million because the first three codes take too long and halotools randomly subsamples | |
## particles above 1 million. The algorithm in halotools is an *exact* copy of `Corrfunc` - so the timing comparison simply | |
## tests the capability of the cython compiler. | |
## Timings generated on Oct 17, 2016, with mostly ready version of Corrfunc v2.0. Non-periodic calculations with random points. | |
## Npts kdcount cKDTree KDTree halotools Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback) | |
10000 0.039 0.057 0.087 0.026 0.240 0.053 0.052 | |
50000 0.381 0.399 0.936 0.121 0.226 0.224 0.214 | |
100000 1.080 1.104 2.757 0.383 0.390 0.386 0.414 | |
500000 14.308 14.114 44.586 7.828 2.689 3.252 4.319 | |
1000000 46.914 42.952 147.532 28.983 7.601 10.433 13.725 | |
## There was a bug in the function dispatcher and the fallback algorithm was really the AVX implementation. That's | |
## why the timings are so close between the AVX and fallback. Bug was fixed in https://github.com/manodeep/Corrfunc/commit/2ed855805414bc10300858499806b7bc07aa773d | |
## Npts kdcount cKDTree halotools Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback) | |
10000 0.050 0.153 0.030 0.223 0.031 0.030 | |
50000 0.451 0.696 0.123 0.101 0.105 0.109 | |
100000 1.265 1.925 0.359 0.219 0.249 0.281 | |
500000 16.541 23.059 6.735 2.687 3.583 4.990 | |
1000000 51.864 68.770 25.743 9.302 13.992 18.639 | |
## Version with dispatch bug. | |
## Npts kdcount cKDTree KDTree halotools Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback) | |
10000 0.052 0.157 0.149 0.028 0.230 0.031 0.027 | |
50000 0.443 0.700 0.717 0.118 0.095 0.104 0.092 | |
100000 1.292 1.898 1.889 0.344 0.202 0.240 0.200 | |
500000 16.257 21.738 21.595 6.374 2.465 3.459 2.482 | |
1000000 48.638 66.999 70.141 26.415 9.326 13.539 9.860 | |
## Previous version -> done at Salt Lake City, UT (Galaxy-Halo Conference, Mar 2016) | |
## Npts kdcount cKDTree KDTree halotools Corrfunc(AVX) Corrfunc(SSE) Corrfunc(Naive) | |
80000 0.947 1.518 1.569 2.109 0.183 0.194 0.210 | |
100000 1.324 1.990 2.012 2.821 0.240 0.267 0.298 | |
500000 16.790 23.291 23.151 51.567 2.872 3.902 5.146 | |
1000000 52.749 71.471 71.354 203.456 10.165 14.473 19.607 | |
from __future__ import print_function | |
import numpy as np | |
try: | |
import kdcount | |
except: | |
kdcount = None | |
try: | |
from scipy.spatial import cKDTree | |
except ImportError: | |
cKDTree = None | |
from halotools.mock_observables.pair_counters import npairs_3d \ | |
as halotools_npairs | |
from Corrfunc.theory.DD import DD | |
import time | |
try: | |
from sklearn.neighbors import KDTree | |
except ImportError: | |
KDTree = None | |
def kdcount_timing(data1, data2, rbins, period): | |
tree1 = kdcount.KDTree(data1).root | |
tree2 = kdcount.KDTree(data2).root | |
return tree1.count(tree2, r=rbins) | |
def halotools_timing(data1, data2, rbins, period): | |
return halotools_npairs(data1, data2, rbins) | |
def cKDTree_timing(data1, data2, rbins, period): | |
tree1 = cKDTree(data1) | |
tree2 = cKDTree(data2) | |
return tree1.count_neighbors(tree2, rbins) | |
def KDTree_timing(data1, data2, rbins, period): | |
tree1 = KDTree(data1) | |
return tree1.two_point_correlation(data2, rbins) | |
def corrfunc_timing(data1, data2, rbins, period): | |
x1 = data1[:, 0] | |
y1 = data1[:, 1] | |
z1 = data1[:, 2] | |
x2 = data2[:, 0] | |
y2 = data2[:, 1] | |
z2 = data2[:, 2] | |
nthreads = 1 | |
autocorr = 0 | |
return DD(autocorr, nthreads, rbins, | |
x1, y1, z1, | |
X2=x2, Y2=y2, Z2=z2, isa='avx', | |
periodic=False) | |
def corrfunc_timing_sse(data1, data2, rbins, period): | |
x1 = data1[:, 0] | |
y1 = data1[:, 1] | |
z1 = data1[:, 2] | |
x2 = data2[:, 0] | |
y2 = data2[:, 1] | |
z2 = data2[:, 2] | |
nthreads = 1 | |
autocorr = 0 | |
return DD(autocorr, nthreads, rbins, | |
x1, y1, z1, | |
X2=x2, Y2=y2, Z2=z2, isa='sse42', | |
periodic=False) | |
def corrfunc_timing_fallback(data1, data2, rbins, period): | |
x1 = data1[:, 0] | |
y1 = data1[:, 1] | |
z1 = data1[:, 2] | |
x2 = data2[:, 0] | |
y2 = data2[:, 1] | |
z2 = data2[:, 2] | |
nthreads = 1 | |
autocorr = 0 | |
return DD(autocorr, nthreads, rbins, | |
x1, y1, z1, | |
X2=x2, Y2=y2, Z2=z2, isa='fallback', | |
periodic=False) | |
def main(): | |
functions_to_test = [] | |
function_names = [] | |
if kdcount is not None: | |
functions_to_test.append(kdcount_timing) | |
function_names.append("kdcount") | |
if cKDTree is not None: | |
functions_to_test.append(cKDTree_timing) | |
function_names.append("cKDTree") | |
if KDTree is not None: | |
functions_to_test.append(KDTree_timing) | |
function_names.append("KDTree") | |
functions_to_test.append(halotools_timing) | |
function_names.append("halotools") | |
functions_to_test.append(corrfunc_timing) | |
function_names.append("Corrfunc(AVX)") | |
functions_to_test.append(corrfunc_timing_sse) | |
function_names.append("Corrfunc(SSE42)") | |
functions_to_test.append(corrfunc_timing_fallback) | |
function_names.append("Corrfunc(fallback)") | |
clustered_data = 0 | |
if clustered_data: | |
from Corrfunc.io import read_catalog | |
x, y, z = read_catalog() | |
boxsize = 420.0 | |
x /= boxsize | |
y /= boxsize | |
z /= boxsize | |
npts = [1e4, 5e4, 1e5, 5e5, 1e6] | |
npts = [int(i) for i in npts] | |
print("## Npts ", end="") | |
for ifunc in function_names: | |
print(" {0:10s}".format(ifunc), end="") | |
print("") | |
for n in npts: | |
npts1 = n | |
npts2 = n | |
if clustered_data == 0: | |
data1 = np.random.random(npts1*3).reshape(npts1, 3) | |
data2 = np.random.random(npts2*3).reshape(npts2, 3) | |
else: | |
newx1 = np.random.choice(x, npts1, replace=False) | |
newy1 = np.random.choice(y, npts1, replace=False) | |
newz1 = np.random.choice(z, npts1, replace=False) | |
data1 = np.array([newx1, newy1, newz1]).transpose() | |
newx2 = np.random.choice(x, npts2, replace=False) | |
newy2 = np.random.choice(y, npts2, replace=False) | |
newz2 = np.random.choice(z, npts2, replace=False) | |
data2 = np.array([newx2, newy2, newz2]).transpose() | |
boxsize = 1.0 | |
rbins = np.linspace(0.001*boxsize, 0.05*boxsize, 10) | |
if data1.max() > boxsize or data2.max() > boxsize: | |
msg = "Max data1 = {0} or max data2 = {1} must be less than "\ | |
"boxsize = {2}".format(data1.max(), data2.max(), boxsize) | |
raise ValueError(msg) | |
print("{0:7d}".format(n), end="") | |
for ii, func_name in enumerate(function_names): | |
t0 = time.time() | |
functions_to_test[ii](data1, data2, rbins, boxsize) | |
t1 = time.time() | |
print(" {0:7.3f} ".format(t1-t0), end="") | |
print("") | |
if __name__ == '__main__': | |
main() |
@aphearin @rainwoodman -- here's the python code I used to benchmark.
The function defined at line 42 seems to use cKDTree instead of KDTree.
Yup. I removed KDTree completely from the benchmark - the timings were virtually identical between kdtree
and ckdtree
.
Ohh I just realized the bug, I have ckdtree
rather than kdtree
in the kdtree
function.
Adding TreeCorr to this?
I added TreeCorr. TreeCorr only do log bins, so I converted the bins to logscale for a fair comparison.
from __future__ import print_function
import numpy as np
try:
import kdcount
except:
kdcount = None
try:
import treecorr
except:
treecorr = None
try:
from scipy.spatial import cKDTree
except ImportError:
cKDTree = None
from halotools.mock_observables.pair_counters import npairs_3d \
as halotools_npairs
from Corrfunc.theory.DD import DD
import time
try:
from sklearn.neighbors import KDTree
except ImportError:
KDTree = None
def kdcount_timing(data1, data2, rbins, period):
tree1 = kdcount.KDTree(data1).root
tree2 = kdcount.KDTree(data2).root
return tree1.count(tree2, r=rbins)
def halotools_timing(data1, data2, rbins, period):
return halotools_npairs(data1, data2, rbins)
def treecorr_timing(data1, data2, rbins, period):
cat1 = treecorr.Catalog(x=data1[:, 0], y=data1[:, 1], z=data1[:, 2])
cat2 = treecorr.Catalog(x=data2[:, 0], y=data2[:, 1], z=data2[:, 2])
nn = treecorr.NNCorrelation(nbins=len(rbins), min_sep=rbins[0], max_sep=rbins[-1])
return nn.process(cat1, cat2, num_threads=1)
def cKDTree_timing(data1, data2, rbins, period):
tree1 = cKDTree(data1)
tree2 = cKDTree(data2)
return tree1.count_neighbors(tree2, rbins)
def KDTree_timing(data1, data2, rbins, period):
tree1 = KDTree(data1)
return tree1.two_point_correlation(data2, rbins)
def corrfunc_timing(data1, data2, rbins, period):
x1 = data1[:, 0]
y1 = data1[:, 1]
z1 = data1[:, 2]
x2 = data2[:, 0]
y2 = data2[:, 1]
z2 = data2[:, 2]
nthreads = 1
autocorr = 0
return DD(autocorr, nthreads, rbins,
x1, y1, z1,
X2=x2, Y2=y2, Z2=z2, isa='avx',
periodic=False)
def corrfunc_timing_sse(data1, data2, rbins, period):
x1 = data1[:, 0]
y1 = data1[:, 1]
z1 = data1[:, 2]
x2 = data2[:, 0]
y2 = data2[:, 1]
z2 = data2[:, 2]
nthreads = 1
autocorr = 0
return DD(autocorr, nthreads, rbins,
x1, y1, z1,
X2=x2, Y2=y2, Z2=z2, isa='sse42',
periodic=False)
def corrfunc_timing_fallback(data1, data2, rbins, period):
x1 = data1[:, 0]
y1 = data1[:, 1]
z1 = data1[:, 2]
x2 = data2[:, 0]
y2 = data2[:, 1]
z2 = data2[:, 2]
nthreads = 1
autocorr = 0
return DD(autocorr, nthreads, rbins,
x1, y1, z1,
X2=x2, Y2=y2, Z2=z2, isa='fallback',
periodic=False)
def main():
functions_to_test = []
function_names = []
if kdcount is not None:
functions_to_test.append(kdcount_timing)
function_names.append("kdcount")
if cKDTree is not None:
functions_to_test.append(cKDTree_timing)
function_names.append("cKDTree")
if KDTree is not None:
functions_to_test.append(KDTree_timing)
function_names.append("KDTree")
if treecorr is not None:
functions_to_test.append(treecorr_timing)
function_names.append("TreeCorr")
functions_to_test.append(halotools_timing)
function_names.append("halotools")
functions_to_test.append(corrfunc_timing)
function_names.append("Corrfunc(AVX)")
functions_to_test.append(corrfunc_timing_sse)
function_names.append("Corrfunc(SSE42)")
functions_to_test.append(corrfunc_timing_fallback)
function_names.append("Corrfunc(fallback)")
clustered_data = 0
if clustered_data:
from Corrfunc.io import read_catalog
x, y, z = read_catalog()
boxsize = 420.0
x /= boxsize
y /= boxsize
z /= boxsize
npts = [1e4, 5e4, 1e5, 5e5, 1e6]
npts = [int(i) for i in npts]
print("## Npts ", end="")
for ifunc in function_names:
print(" {0:10s}".format(ifunc), end="")
print("")
for n in npts:
npts1 = n
npts2 = n
if clustered_data == 0:
data1 = np.random.random(npts1*3).reshape(npts1, 3)
data2 = np.random.random(npts2*3).reshape(npts2, 3)
else:
newx1 = np.random.choice(x, npts1, replace=False)
newy1 = np.random.choice(y, npts1, replace=False)
newz1 = np.random.choice(z, npts1, replace=False)
data1 = np.array([newx1, newy1, newz1]).transpose()
newx2 = np.random.choice(x, npts2, replace=False)
newy2 = np.random.choice(y, npts2, replace=False)
newz2 = np.random.choice(z, npts2, replace=False)
data2 = np.array([newx2, newy2, newz2]).transpose()
boxsize = 1.0
rbins = np.logspace(-3, np.log10(0.05), 10) * boxsize
if data1.max() > boxsize or data2.max() > boxsize:
msg = "Max data1 = {0} or max data2 = {1} must be less than "\
"boxsize = {2}".format(data1.max(), data2.max(), boxsize)
raise ValueError(msg)
print("{0:7d}".format(n), end="")
for ii, func_name in enumerate(function_names):
t0 = time.time()
functions_to_test[ii](data1, data2, rbins, boxsize)
t1 = time.time()
print(" {0:7.3f} ".format(t1-t0), end="")
print("")
if __name__ == '__main__':
main()
Benchmark on my computer for reference.
## Npts kdcount cKDTree KDTree TreeCorr halotools Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback)
10000 0.030 0.036 0.056 0.040 0.045 0.036 0.033 0.028
50000 0.254 0.257 0.591 0.266 0.113 0.193 0.178 0.155
100000 0.754 0.700 1.536 0.668 0.288 0.340 0.324 0.315
500000 9.333 7.780 26.734 9.304 5.089 2.069 2.234 2.733
1000000 27.889 23.492 89.310 29.456 18.926 5.362 6.534 8.848
@rainwoodman - thanks for the new timings! I did not notice your comments until now.
I need to run a new timing test, including the new Corrfunc AVX512F kernels as well.
Here is the code I used:
from __future__ import print_function
import numpy as np
try:
import kdcount
except:
kdcount = None
try:
import treecorr
except:
treecorr = None
try:
from scipy.spatial import cKDTree
except ImportError:
cKDTree = None
from halotools.mock_observables.pair_counters import npairs_3d \
as halotools_npairs
from Corrfunc.theory.DD import DD
import time
try:
from sklearn.neighbors import KDTree
except ImportError:
KDTree = None
def kdcount_timing(data1, data2, rbins, period):
tree1 = kdcount.KDTree(data1).root
tree2 = kdcount.KDTree(data2).root
return tree1.count(tree2, r=rbins)
def halotools_timing(data1, data2, rbins, period):
return halotools_npairs(data1, data2, rbins)
def treecorr_timing(data1, data2, rbins, period):
cat1 = treecorr.Catalog(x=data1[:, 0], y=data1[:, 1], z=data1[:, 2])
cat2 = treecorr.Catalog(x=data2[:, 0], y=data2[:, 1], z=data2[:, 2])
nn = treecorr.NNCorrelation(nbins=len(rbins), min_sep=rbins[0], max_sep=rbins[-1])
return nn.process(cat1, cat2, num_threads=1)
def cKDTree_timing(data1, data2, rbins, period):
tree1 = cKDTree(data1)
tree2 = cKDTree(data2)
return tree1.count_neighbors(tree2, rbins)
def KDTree_timing(data1, data2, rbins, period):
tree1 = KDTree(data1)
return tree1.two_point_correlation(data2, rbins)
def corrfunc_timing(data1, data2, rbins, period):
x1 = data1[:, 0]
y1 = data1[:, 1]
z1 = data1[:, 2]
x2 = data2[:, 0]
y2 = data2[:, 1]
z2 = data2[:, 2]
nthreads = 1
autocorr = 0
return DD(autocorr, nthreads, rbins,
x1, y1, z1,
X2=x2, Y2=y2, Z2=z2, isa='avx512f',
periodic=False)
def corrfunc_timing_avx(data1, data2, rbins, period):
x1 = data1[:, 0]
y1 = data1[:, 1]
z1 = data1[:, 2]
x2 = data2[:, 0]
y2 = data2[:, 1]
z2 = data2[:, 2]
nthreads = 1
autocorr = 0
return DD(autocorr, nthreads, rbins,
x1, y1, z1,
X2=x2, Y2=y2, Z2=z2, isa='avx',
periodic=False)
def corrfunc_timing_sse(data1, data2, rbins, period):
x1 = data1[:, 0]
y1 = data1[:, 1]
z1 = data1[:, 2]
x2 = data2[:, 0]
y2 = data2[:, 1]
z2 = data2[:, 2]
nthreads = 1
autocorr = 0
return DD(autocorr, nthreads, rbins,
x1, y1, z1,
X2=x2, Y2=y2, Z2=z2, isa='sse42',
periodic=False)
def corrfunc_timing_fallback(data1, data2, rbins, period):
x1 = data1[:, 0]
y1 = data1[:, 1]
z1 = data1[:, 2]
x2 = data2[:, 0]
y2 = data2[:, 1]
z2 = data2[:, 2]
nthreads = 1
autocorr = 0
return DD(autocorr, nthreads, rbins,
x1, y1, z1,
X2=x2, Y2=y2, Z2=z2, isa='fallback',
periodic=False)
def main():
functions_to_test = []
function_names = []
function_versions = []
if kdcount is not None:
functions_to_test.append(kdcount_timing)
function_names.append("kdcount")
function_versions.append(kdcount.__version__)
if cKDTree is not None:
functions_to_test.append(cKDTree_timing)
function_names.append("cKDTree")
from scipy import __version__ as scipy_ver
function_versions.append(scipy_ver)
if KDTree is not None:
functions_to_test.append(KDTree_timing)
from sklearn import __version__ as sklearn_ver
function_names.append("KDTree")
function_versions.append(sklearn_ver)
if treecorr is not None:
functions_to_test.append(treecorr_timing)
function_names.append("TreeCorr")
function_versions.append(treecorr.__version__)
import halotools
functions_to_test.append(halotools_timing)
function_names.append("halotools")
function_versions.append(halotools.__version__)
import Corrfunc as corrfunc
functions_to_test.append(corrfunc_timing)
function_names.append("Corrfunc(AVX512f)")
function_versions.append(corrfunc.__version__)
functions_to_test.append(corrfunc_timing_avx)
function_names.append("Corrfunc(AVX)")
function_versions.append(corrfunc.__version__)
functions_to_test.append(corrfunc_timing_sse)
function_names.append("Corrfunc(SSE42)")
function_versions.append(corrfunc.__version__)
functions_to_test.append(corrfunc_timing_fallback)
function_names.append("Corrfunc(fallback)")
function_versions.append(corrfunc.__version__)
for fn, fn_version in zip(function_names, function_versions):
print("## {} version = {}".format(fn, fn_version))
for clustered_data in [0, 1]:
if clustered_data:
from Corrfunc.io import read_catalog
x, y, z = read_catalog()
boxsize = 420.0
x /= boxsize
y /= boxsize
z /= boxsize
npts = [1e4, 5e4, 1e5, 5e5, 1e6]
npts = [int(i) for i in npts]
boxsize = 1.0
rbins = np.logspace(-3, np.log10(0.05), 10) * boxsize
print("## boxsize = {}. Bins are :".format(boxsize))
for ll, uu in zip(rbins[0:-2], rbins[1:]):
print("## {:0.5e} {:0.5e} ".format(ll, uu))
print("## clustered data = {}".format(clustered_data))
print("## Npts ", end="")
for ifunc in function_names:
print(" {:14s} ".format(ifunc), end="")
print("")
for n in npts:
npts1 = n
npts2 = n
if clustered_data == 0:
data1 = np.random.random(npts1*3).reshape(npts1, 3)
data2 = np.random.random(npts2*3).reshape(npts2, 3)
else:
newx1 = np.random.choice(x, npts1, replace=False)
newy1 = np.random.choice(y, npts1, replace=False)
newz1 = np.random.choice(z, npts1, replace=False)
data1 = np.array([newx1, newy1, newz1]).transpose()
newx2 = np.random.choice(x, npts2, replace=False)
newy2 = np.random.choice(y, npts2, replace=False)
newz2 = np.random.choice(z, npts2, replace=False)
data2 = np.array([newx2, newy2, newz2]).transpose()
if data1.max() > boxsize or data2.max() > boxsize:
msg = "Max data1 = {0} or max data2 = {1} must be less than "\
"boxsize = {2}".format(data1.max(), data2.max(), boxsize)
raise ValueError(msg)
print("{0:7d}".format(n), end="")
for ii, func_name in enumerate(function_names):
t0 = time.time()
functions_to_test[ii](data1, data2, rbins, boxsize)
t1 = time.time()
print(" {0:14.3f} ".format(t1-t0), end="", flush=True)
print("")
if __name__ == '__main__':
main()
## kdcount version = 0.3.29
## cKDTree version = 1.2.0
## TreeCorr version = 3.3.11
## halotools version = 0.6
## Corrfunc(AVX512f) version = 2.3.0
## Corrfunc(AVX) version = 2.3.0
## Corrfunc(SSE42) version = 2.3.0
## Corrfunc(fallback) version = 2.3.0
## boxsize = 1.0. Bins are :
## 1.00000e-03 1.54445e-03
## 1.54445e-03 2.38533e-03
## 2.38533e-03 3.68403e-03
## 3.68403e-03 5.68981e-03
## 5.68981e-03 8.78764e-03
## 8.78764e-03 1.35721e-02
## 1.35721e-02 2.09614e-02
## 2.09614e-02 3.23739e-02
## 3.23739e-02 5.00000e-02
## clustered data = 0
## Npts kdcount cKDTree TreeCorr halotools Corrfunc(AVX512f) Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback)
10000 0.035 0.034 0.032 0.025 0.071 0.060 0.056 0.056
50000 0.304 0.262 0.251 0.100 0.216 0.225 0.221 0.221
100000 0.865 0.745 0.721 0.273 0.378 0.410 0.411 0.425
500000 10.709 9.459 10.123 4.541 1.644 2.003 2.410 2.686
1000000 33.051 29.545 33.107 17.109 3.768 5.093 6.613 7.946
## boxsize = 1.0. Bins are :
## 1.00000e-03 1.54445e-03
## 1.54445e-03 2.38533e-03
## 2.38533e-03 3.68403e-03
## 3.68403e-03 5.68981e-03
## 5.68981e-03 8.78764e-03
## 8.78764e-03 1.35721e-02
## 1.35721e-02 2.09614e-02
## 2.09614e-02 3.23739e-02
## 3.23739e-02 5.00000e-02
## clustered data = 1
## Npts kdcount cKDTree TreeCorr halotools Corrfunc(AVX512f) Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback)
10000 0.034 0.034 0.031 0.025 0.043 0.041 0.041 0.041
50000 0.307 0.260 0.239 0.101 0.169 0.176 0.172 0.173
100000 0.869 0.741 0.695 0.270 0.322 0.359 0.353 0.368
500000 10.799 9.451 9.990 4.549 1.600 1.989 2.446 2.691
1000000 33.221 29.097 32.744 17.303 3.839 5.111 6.686 8.074
Looks like no difference between clustered vs random data.
Requires halotools, kdcount, scipy and my code Corrfunc. All calculations are in double precision. Wrote down the AVX numbers by hand.