Last active
January 25, 2019 04:57
-
-
Save manodeep/cffd9a5d77510e43ccf0 to your computer and use it in GitHub Desktop.
Benchmarking Pairwise Separation Codes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Timings with halotools v0.4 and Corrfunc v2.0.0. Updated on Sep 12 (random data, Corrfunc compiled with gcc6.0) | |
## Note this test is capped at 1 million because the first three codes take too long and halotools randomly subsamples | |
## particles above 1 million. The algorithm in halotools is an *exact* copy of `Corrfunc` - so the timing comparison simply | |
## tests the capability of the cython compiler. | |
## Timings generated on Oct 17, 2016, with mostly ready version of Corrfunc v2.0. Non-periodic calculations with random points. | |
## Npts kdcount cKDTree KDTree halotools Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback) | |
10000 0.039 0.057 0.087 0.026 0.240 0.053 0.052 | |
50000 0.381 0.399 0.936 0.121 0.226 0.224 0.214 | |
100000 1.080 1.104 2.757 0.383 0.390 0.386 0.414 | |
500000 14.308 14.114 44.586 7.828 2.689 3.252 4.319 | |
1000000 46.914 42.952 147.532 28.983 7.601 10.433 13.725 | |
## There was a bug in the function dispatcher and the fallback algorithm was really the AVX implementation. That's | |
## why the timings are so close between the AVX and fallback. Bug was fixed in https://github.com/manodeep/Corrfunc/commit/2ed855805414bc10300858499806b7bc07aa773d | |
## Npts kdcount cKDTree halotools Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback) | |
10000 0.050 0.153 0.030 0.223 0.031 0.030 | |
50000 0.451 0.696 0.123 0.101 0.105 0.109 | |
100000 1.265 1.925 0.359 0.219 0.249 0.281 | |
500000 16.541 23.059 6.735 2.687 3.583 4.990 | |
1000000 51.864 68.770 25.743 9.302 13.992 18.639 | |
## Version with dispatch bug. | |
## Npts kdcount cKDTree KDTree halotools Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback) | |
10000 0.052 0.157 0.149 0.028 0.230 0.031 0.027 | |
50000 0.443 0.700 0.717 0.118 0.095 0.104 0.092 | |
100000 1.292 1.898 1.889 0.344 0.202 0.240 0.200 | |
500000 16.257 21.738 21.595 6.374 2.465 3.459 2.482 | |
1000000 48.638 66.999 70.141 26.415 9.326 13.539 9.860 | |
## Previous version -> done at Salt Lake City, UT (Galaxy-Halo Conference, Mar 2016) | |
## Npts kdcount cKDTree KDTree halotools Corrfunc(AVX) Corrfunc(SSE) Corrfunc(Naive) | |
80000 0.947 1.518 1.569 2.109 0.183 0.194 0.210 | |
100000 1.324 1.990 2.012 2.821 0.240 0.267 0.298 | |
500000 16.790 23.291 23.151 51.567 2.872 3.902 5.146 | |
1000000 52.749 71.471 71.354 203.456 10.165 14.473 19.607 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import numpy as np | |
try: | |
import kdcount | |
except: | |
kdcount = None | |
try: | |
from scipy.spatial import cKDTree | |
except ImportError: | |
cKDTree = None | |
from halotools.mock_observables.pair_counters import npairs_3d \ | |
as halotools_npairs | |
from Corrfunc.theory.DD import DD | |
import time | |
try: | |
from sklearn.neighbors import KDTree | |
except ImportError: | |
KDTree = None | |
def kdcount_timing(data1, data2, rbins, period): | |
tree1 = kdcount.KDTree(data1).root | |
tree2 = kdcount.KDTree(data2).root | |
return tree1.count(tree2, r=rbins) | |
def halotools_timing(data1, data2, rbins, period): | |
return halotools_npairs(data1, data2, rbins) | |
def cKDTree_timing(data1, data2, rbins, period): | |
tree1 = cKDTree(data1) | |
tree2 = cKDTree(data2) | |
return tree1.count_neighbors(tree2, rbins) | |
def KDTree_timing(data1, data2, rbins, period): | |
tree1 = KDTree(data1) | |
return tree1.two_point_correlation(data2, rbins) | |
def corrfunc_timing(data1, data2, rbins, period): | |
x1 = data1[:, 0] | |
y1 = data1[:, 1] | |
z1 = data1[:, 2] | |
x2 = data2[:, 0] | |
y2 = data2[:, 1] | |
z2 = data2[:, 2] | |
nthreads = 1 | |
autocorr = 0 | |
return DD(autocorr, nthreads, rbins, | |
x1, y1, z1, | |
X2=x2, Y2=y2, Z2=z2, isa='avx', | |
periodic=False) | |
def corrfunc_timing_sse(data1, data2, rbins, period): | |
x1 = data1[:, 0] | |
y1 = data1[:, 1] | |
z1 = data1[:, 2] | |
x2 = data2[:, 0] | |
y2 = data2[:, 1] | |
z2 = data2[:, 2] | |
nthreads = 1 | |
autocorr = 0 | |
return DD(autocorr, nthreads, rbins, | |
x1, y1, z1, | |
X2=x2, Y2=y2, Z2=z2, isa='sse42', | |
periodic=False) | |
def corrfunc_timing_fallback(data1, data2, rbins, period): | |
x1 = data1[:, 0] | |
y1 = data1[:, 1] | |
z1 = data1[:, 2] | |
x2 = data2[:, 0] | |
y2 = data2[:, 1] | |
z2 = data2[:, 2] | |
nthreads = 1 | |
autocorr = 0 | |
return DD(autocorr, nthreads, rbins, | |
x1, y1, z1, | |
X2=x2, Y2=y2, Z2=z2, isa='fallback', | |
periodic=False) | |
def main(): | |
functions_to_test = [] | |
function_names = [] | |
if kdcount is not None: | |
functions_to_test.append(kdcount_timing) | |
function_names.append("kdcount") | |
if cKDTree is not None: | |
functions_to_test.append(cKDTree_timing) | |
function_names.append("cKDTree") | |
if KDTree is not None: | |
functions_to_test.append(KDTree_timing) | |
function_names.append("KDTree") | |
functions_to_test.append(halotools_timing) | |
function_names.append("halotools") | |
functions_to_test.append(corrfunc_timing) | |
function_names.append("Corrfunc(AVX)") | |
functions_to_test.append(corrfunc_timing_sse) | |
function_names.append("Corrfunc(SSE42)") | |
functions_to_test.append(corrfunc_timing_fallback) | |
function_names.append("Corrfunc(fallback)") | |
clustered_data = 0 | |
if clustered_data: | |
from Corrfunc.io import read_catalog | |
x, y, z = read_catalog() | |
boxsize = 420.0 | |
x /= boxsize | |
y /= boxsize | |
z /= boxsize | |
npts = [1e4, 5e4, 1e5, 5e5, 1e6] | |
npts = [int(i) for i in npts] | |
print("## Npts ", end="") | |
for ifunc in function_names: | |
print(" {0:10s}".format(ifunc), end="") | |
print("") | |
for n in npts: | |
npts1 = n | |
npts2 = n | |
if clustered_data == 0: | |
data1 = np.random.random(npts1*3).reshape(npts1, 3) | |
data2 = np.random.random(npts2*3).reshape(npts2, 3) | |
else: | |
newx1 = np.random.choice(x, npts1, replace=False) | |
newy1 = np.random.choice(y, npts1, replace=False) | |
newz1 = np.random.choice(z, npts1, replace=False) | |
data1 = np.array([newx1, newy1, newz1]).transpose() | |
newx2 = np.random.choice(x, npts2, replace=False) | |
newy2 = np.random.choice(y, npts2, replace=False) | |
newz2 = np.random.choice(z, npts2, replace=False) | |
data2 = np.array([newx2, newy2, newz2]).transpose() | |
boxsize = 1.0 | |
rbins = np.linspace(0.001*boxsize, 0.05*boxsize, 10) | |
if data1.max() > boxsize or data2.max() > boxsize: | |
msg = "Max data1 = {0} or max data2 = {1} must be less than "\ | |
"boxsize = {2}".format(data1.max(), data2.max(), boxsize) | |
raise ValueError(msg) | |
print("{0:7d}".format(n), end="") | |
for ii, func_name in enumerate(function_names): | |
t0 = time.time() | |
functions_to_test[ii](data1, data2, rbins, boxsize) | |
t1 = time.time() | |
print(" {0:7.3f} ".format(t1-t0), end="") | |
print("") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here is the code I used:
Looks like no difference between clustered vs random data.