Skip to content

Instantly share code, notes, and snippets.

@manodeep
Last active January 25, 2019 04:57
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save manodeep/cffd9a5d77510e43ccf0 to your computer and use it in GitHub Desktop.
Save manodeep/cffd9a5d77510e43ccf0 to your computer and use it in GitHub Desktop.
Benchmarking Pairwise Separation Codes
## Timings with halotools v0.4 and Corrfunc v2.0.0. Updated on Sep 12 (random data, Corrfunc compiled with gcc6.0)
## Note this test is capped at 1 million because the first three codes take too long and halotools randomly subsamples
## particles above 1 million. The algorithm in halotools is an *exact* copy of `Corrfunc` - so the timing comparison simply
## tests the capability of the cython compiler.
## Timings generated on Oct 17, 2016, with mostly ready version of Corrfunc v2.0. Non-periodic calculations with random points.
## Npts kdcount cKDTree KDTree halotools Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback)
10000 0.039 0.057 0.087 0.026 0.240 0.053 0.052
50000 0.381 0.399 0.936 0.121 0.226 0.224 0.214
100000 1.080 1.104 2.757 0.383 0.390 0.386 0.414
500000 14.308 14.114 44.586 7.828 2.689 3.252 4.319
1000000 46.914 42.952 147.532 28.983 7.601 10.433 13.725
## There was a bug in the function dispatcher and the fallback algorithm was really the AVX implementation. That's
## why the timings are so close between the AVX and fallback. Bug was fixed in https://github.com/manodeep/Corrfunc/commit/2ed855805414bc10300858499806b7bc07aa773d
## Npts kdcount cKDTree halotools Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback)
10000 0.050 0.153 0.030 0.223 0.031 0.030
50000 0.451 0.696 0.123 0.101 0.105 0.109
100000 1.265 1.925 0.359 0.219 0.249 0.281
500000 16.541 23.059 6.735 2.687 3.583 4.990
1000000 51.864 68.770 25.743 9.302 13.992 18.639
## Version with dispatch bug.
## Npts kdcount cKDTree KDTree halotools Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback)
10000 0.052 0.157 0.149 0.028 0.230 0.031 0.027
50000 0.443 0.700 0.717 0.118 0.095 0.104 0.092
100000 1.292 1.898 1.889 0.344 0.202 0.240 0.200
500000 16.257 21.738 21.595 6.374 2.465 3.459 2.482
1000000 48.638 66.999 70.141 26.415 9.326 13.539 9.860
## Previous version -> done at Salt Lake City, UT (Galaxy-Halo Conference, Mar 2016)
## Npts kdcount cKDTree KDTree halotools Corrfunc(AVX) Corrfunc(SSE) Corrfunc(Naive)
80000 0.947 1.518 1.569 2.109 0.183 0.194 0.210
100000 1.324 1.990 2.012 2.821 0.240 0.267 0.298
500000 16.790 23.291 23.151 51.567 2.872 3.902 5.146
1000000 52.749 71.471 71.354 203.456 10.165 14.473 19.607
from __future__ import print_function
import numpy as np
try:
import kdcount
except:
kdcount = None
try:
from scipy.spatial import cKDTree
except ImportError:
cKDTree = None
from halotools.mock_observables.pair_counters import npairs_3d \
as halotools_npairs
from Corrfunc.theory.DD import DD
import time
try:
from sklearn.neighbors import KDTree
except ImportError:
KDTree = None
def kdcount_timing(data1, data2, rbins, period):
tree1 = kdcount.KDTree(data1).root
tree2 = kdcount.KDTree(data2).root
return tree1.count(tree2, r=rbins)
def halotools_timing(data1, data2, rbins, period):
return halotools_npairs(data1, data2, rbins)
def cKDTree_timing(data1, data2, rbins, period):
tree1 = cKDTree(data1)
tree2 = cKDTree(data2)
return tree1.count_neighbors(tree2, rbins)
def KDTree_timing(data1, data2, rbins, period):
tree1 = KDTree(data1)
return tree1.two_point_correlation(data2, rbins)
def corrfunc_timing(data1, data2, rbins, period):
x1 = data1[:, 0]
y1 = data1[:, 1]
z1 = data1[:, 2]
x2 = data2[:, 0]
y2 = data2[:, 1]
z2 = data2[:, 2]
nthreads = 1
autocorr = 0
return DD(autocorr, nthreads, rbins,
x1, y1, z1,
X2=x2, Y2=y2, Z2=z2, isa='avx',
periodic=False)
def corrfunc_timing_sse(data1, data2, rbins, period):
x1 = data1[:, 0]
y1 = data1[:, 1]
z1 = data1[:, 2]
x2 = data2[:, 0]
y2 = data2[:, 1]
z2 = data2[:, 2]
nthreads = 1
autocorr = 0
return DD(autocorr, nthreads, rbins,
x1, y1, z1,
X2=x2, Y2=y2, Z2=z2, isa='sse42',
periodic=False)
def corrfunc_timing_fallback(data1, data2, rbins, period):
x1 = data1[:, 0]
y1 = data1[:, 1]
z1 = data1[:, 2]
x2 = data2[:, 0]
y2 = data2[:, 1]
z2 = data2[:, 2]
nthreads = 1
autocorr = 0
return DD(autocorr, nthreads, rbins,
x1, y1, z1,
X2=x2, Y2=y2, Z2=z2, isa='fallback',
periodic=False)
def main():
functions_to_test = []
function_names = []
if kdcount is not None:
functions_to_test.append(kdcount_timing)
function_names.append("kdcount")
if cKDTree is not None:
functions_to_test.append(cKDTree_timing)
function_names.append("cKDTree")
if KDTree is not None:
functions_to_test.append(KDTree_timing)
function_names.append("KDTree")
functions_to_test.append(halotools_timing)
function_names.append("halotools")
functions_to_test.append(corrfunc_timing)
function_names.append("Corrfunc(AVX)")
functions_to_test.append(corrfunc_timing_sse)
function_names.append("Corrfunc(SSE42)")
functions_to_test.append(corrfunc_timing_fallback)
function_names.append("Corrfunc(fallback)")
clustered_data = 0
if clustered_data:
from Corrfunc.io import read_catalog
x, y, z = read_catalog()
boxsize = 420.0
x /= boxsize
y /= boxsize
z /= boxsize
npts = [1e4, 5e4, 1e5, 5e5, 1e6]
npts = [int(i) for i in npts]
print("## Npts ", end="")
for ifunc in function_names:
print(" {0:10s}".format(ifunc), end="")
print("")
for n in npts:
npts1 = n
npts2 = n
if clustered_data == 0:
data1 = np.random.random(npts1*3).reshape(npts1, 3)
data2 = np.random.random(npts2*3).reshape(npts2, 3)
else:
newx1 = np.random.choice(x, npts1, replace=False)
newy1 = np.random.choice(y, npts1, replace=False)
newz1 = np.random.choice(z, npts1, replace=False)
data1 = np.array([newx1, newy1, newz1]).transpose()
newx2 = np.random.choice(x, npts2, replace=False)
newy2 = np.random.choice(y, npts2, replace=False)
newz2 = np.random.choice(z, npts2, replace=False)
data2 = np.array([newx2, newy2, newz2]).transpose()
boxsize = 1.0
rbins = np.linspace(0.001*boxsize, 0.05*boxsize, 10)
if data1.max() > boxsize or data2.max() > boxsize:
msg = "Max data1 = {0} or max data2 = {1} must be less than "\
"boxsize = {2}".format(data1.max(), data2.max(), boxsize)
raise ValueError(msg)
print("{0:7d}".format(n), end="")
for ii, func_name in enumerate(function_names):
t0 = time.time()
functions_to_test[ii](data1, data2, rbins, boxsize)
t1 = time.time()
print(" {0:7.3f} ".format(t1-t0), end="")
print("")
if __name__ == '__main__':
main()
@manodeep
Copy link
Author

Requires halotools, kdcount, scipy and my code Corrfunc. All calculations are in double precision. Wrote down the AVX numbers by hand.

@manodeep
Copy link
Author

@aphearin @rainwoodman -- here's the python code I used to benchmark.

@mehdirezaie
Copy link

The function defined at line 42 seems to use cKDTree instead of KDTree.

@manodeep
Copy link
Author

Yup. I removed KDTree completely from the benchmark - the timings were virtually identical between kdtree and ckdtree.

Ohh I just realized the bug, I have ckdtree rather than kdtree in the kdtree function.

@rainwoodman
Copy link

Adding TreeCorr to this?

@rainwoodman
Copy link

rainwoodman commented Jul 26, 2018

I added TreeCorr. TreeCorr only do log bins, so I converted the bins to logscale for a fair comparison.

from __future__ import print_function
import numpy as np
try:
    import kdcount
except:
    kdcount = None

try:
    import treecorr
except:
    treecorr = None


try:
    from scipy.spatial import cKDTree
except ImportError:
    cKDTree = None

from halotools.mock_observables.pair_counters import npairs_3d \
    as halotools_npairs

from Corrfunc.theory.DD import DD
import time

try:
    from sklearn.neighbors import KDTree
except ImportError:
    KDTree = None


def kdcount_timing(data1, data2, rbins, period):
    tree1 = kdcount.KDTree(data1).root
    tree2 = kdcount.KDTree(data2).root
    return tree1.count(tree2, r=rbins)


def halotools_timing(data1, data2, rbins, period):
    return halotools_npairs(data1, data2, rbins)

def treecorr_timing(data1, data2, rbins, period):
    cat1 = treecorr.Catalog(x=data1[:, 0], y=data1[:, 1], z=data1[:, 2])
    cat2 = treecorr.Catalog(x=data2[:, 0], y=data2[:, 1], z=data2[:, 2])
    nn = treecorr.NNCorrelation(nbins=len(rbins), min_sep=rbins[0], max_sep=rbins[-1])
    return nn.process(cat1, cat2, num_threads=1)

def cKDTree_timing(data1, data2, rbins, period):
    tree1 = cKDTree(data1)
    tree2 = cKDTree(data2)
    return tree1.count_neighbors(tree2, rbins)


def KDTree_timing(data1, data2, rbins, period):
    tree1 = KDTree(data1)
    return tree1.two_point_correlation(data2, rbins)


def corrfunc_timing(data1, data2, rbins, period):
    x1 = data1[:, 0]
    y1 = data1[:, 1]
    z1 = data1[:, 2]

    x2 = data2[:, 0]
    y2 = data2[:, 1]
    z2 = data2[:, 2]
    nthreads = 1
    autocorr = 0
    return DD(autocorr, nthreads, rbins,
              x1, y1, z1,
              X2=x2, Y2=y2, Z2=z2, isa='avx',
              periodic=False)


def corrfunc_timing_sse(data1, data2, rbins, period):
    x1 = data1[:, 0]
    y1 = data1[:, 1]
    z1 = data1[:, 2]

    x2 = data2[:, 0]
    y2 = data2[:, 1]
    z2 = data2[:, 2]
    nthreads = 1
    autocorr = 0
    return DD(autocorr, nthreads, rbins,
              x1, y1, z1,
              X2=x2, Y2=y2, Z2=z2, isa='sse42',
              periodic=False)


def corrfunc_timing_fallback(data1, data2, rbins, period):
    x1 = data1[:, 0]
    y1 = data1[:, 1]
    z1 = data1[:, 2]

    x2 = data2[:, 0]
    y2 = data2[:, 1]
    z2 = data2[:, 2]
    nthreads = 1
    autocorr = 0
    return DD(autocorr, nthreads, rbins,
              x1, y1, z1,
              X2=x2, Y2=y2, Z2=z2, isa='fallback',
              periodic=False)


def main():
    functions_to_test = []
    function_names = []
    if kdcount is not None:
        functions_to_test.append(kdcount_timing)
        function_names.append("kdcount")

    if cKDTree is not None:
        functions_to_test.append(cKDTree_timing)
        function_names.append("cKDTree")

    if KDTree is not None:
        functions_to_test.append(KDTree_timing)
        function_names.append("KDTree")

    if treecorr is not None:
        functions_to_test.append(treecorr_timing)
        function_names.append("TreeCorr")
        
    functions_to_test.append(halotools_timing)
    function_names.append("halotools")
    
    functions_to_test.append(corrfunc_timing)
    function_names.append("Corrfunc(AVX)")

    functions_to_test.append(corrfunc_timing_sse)
    function_names.append("Corrfunc(SSE42)")
    
    functions_to_test.append(corrfunc_timing_fallback)
    function_names.append("Corrfunc(fallback)")

    clustered_data = 0
    if clustered_data:
        from Corrfunc.io import read_catalog
        x, y, z = read_catalog()
        boxsize = 420.0
        x /= boxsize
        y /= boxsize
        z /= boxsize

    npts = [1e4, 5e4, 1e5, 5e5, 1e6]
    npts = [int(i) for i in npts]
    print("## Npts ", end="")
    for ifunc in function_names:
        print(" {0:10s}".format(ifunc), end="")

    print("")

    for n in npts:
        npts1 = n
        npts2 = n
        if clustered_data == 0:
            data1 = np.random.random(npts1*3).reshape(npts1, 3)
            data2 = np.random.random(npts2*3).reshape(npts2, 3)
        else:
            newx1 = np.random.choice(x, npts1, replace=False)
            newy1 = np.random.choice(y, npts1, replace=False)
            newz1 = np.random.choice(z, npts1, replace=False)
            
            data1 = np.array([newx1, newy1, newz1]).transpose()
            newx2 = np.random.choice(x, npts2, replace=False)
            newy2 = np.random.choice(y, npts2, replace=False)
            newz2 = np.random.choice(z, npts2, replace=False)
            data2 = np.array([newx2, newy2, newz2]).transpose()

        boxsize = 1.0
        rbins = np.logspace(-3, np.log10(0.05), 10) * boxsize
        if data1.max() > boxsize or data2.max() > boxsize:
            msg = "Max data1 = {0} or max data2 = {1} must be less than "\
                  "boxsize = {2}".format(data1.max(), data2.max(), boxsize)
            raise ValueError(msg)
            
        print("{0:7d}".format(n), end="")
        for ii, func_name in enumerate(function_names):
            t0 = time.time()
            functions_to_test[ii](data1, data2, rbins, boxsize)
            t1 = time.time()
            print(" {0:7.3f}   ".format(t1-t0), end="")

        print("")

if __name__ == '__main__':
    main()

@rainwoodman
Copy link

Benchmark on my computer for reference.

## Npts  kdcount    cKDTree    KDTree     TreeCorr   halotools  Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback)
  10000   0.030      0.036      0.056      0.040      0.045      0.036      0.033      0.028   
  50000   0.254      0.257      0.591      0.266      0.113      0.193      0.178      0.155   
 100000   0.754      0.700      1.536      0.668      0.288      0.340      0.324      0.315   
 500000   9.333      7.780     26.734      9.304      5.089      2.069      2.234      2.733   
1000000  27.889     23.492     89.310     29.456     18.926      5.362      6.534      8.848   

@manodeep
Copy link
Author

@rainwoodman - thanks for the new timings! I did not notice your comments until now.

I need to run a new timing test, including the new Corrfunc AVX512F kernels as well.

@manodeep
Copy link
Author

Here is the code I used:

from __future__ import print_function
import numpy as np
try:
    import kdcount
except:
    kdcount = None

try:
    import treecorr
except:
    treecorr = None


try:
    from scipy.spatial import cKDTree
except ImportError:
    cKDTree = None

from halotools.mock_observables.pair_counters import npairs_3d \
    as halotools_npairs

from Corrfunc.theory.DD import DD
import time

try:
    from sklearn.neighbors import KDTree
except ImportError:
    KDTree = None


def kdcount_timing(data1, data2, rbins, period):
    tree1 = kdcount.KDTree(data1).root
    tree2 = kdcount.KDTree(data2).root
    return tree1.count(tree2, r=rbins)


def halotools_timing(data1, data2, rbins, period):
    return halotools_npairs(data1, data2, rbins)

def treecorr_timing(data1, data2, rbins, period):
    cat1 = treecorr.Catalog(x=data1[:, 0], y=data1[:, 1], z=data1[:, 2])
    cat2 = treecorr.Catalog(x=data2[:, 0], y=data2[:, 1], z=data2[:, 2])
    nn = treecorr.NNCorrelation(nbins=len(rbins), min_sep=rbins[0], max_sep=rbins[-1])
    return nn.process(cat1, cat2, num_threads=1)

def cKDTree_timing(data1, data2, rbins, period):
    tree1 = cKDTree(data1)
    tree2 = cKDTree(data2)
    return tree1.count_neighbors(tree2, rbins)
    

def KDTree_timing(data1, data2, rbins, period):
    tree1 = KDTree(data1)
    return tree1.two_point_correlation(data2, rbins)

def corrfunc_timing(data1, data2, rbins, period):
    x1 = data1[:, 0]
    y1 = data1[:, 1]
    z1 = data1[:, 2]

    x2 = data2[:, 0]
    y2 = data2[:, 1]
    z2 = data2[:, 2]
    nthreads = 1
    autocorr = 0
    return DD(autocorr, nthreads, rbins,
              x1, y1, z1,
              X2=x2, Y2=y2, Z2=z2, isa='avx512f',
              periodic=False)
    
def corrfunc_timing_avx(data1, data2, rbins, period):
    x1 = data1[:, 0]
    y1 = data1[:, 1]
    z1 = data1[:, 2]

    x2 = data2[:, 0]
    y2 = data2[:, 1]
    z2 = data2[:, 2]
    nthreads = 1
    autocorr = 0
    return DD(autocorr, nthreads, rbins,
              x1, y1, z1,
              X2=x2, Y2=y2, Z2=z2, isa='avx',
              periodic=False)
    

def corrfunc_timing_sse(data1, data2, rbins, period):
    x1 = data1[:, 0]
    y1 = data1[:, 1]
    z1 = data1[:, 2]

    x2 = data2[:, 0]
    y2 = data2[:, 1]
    z2 = data2[:, 2]
    nthreads = 1
    autocorr = 0
    return DD(autocorr, nthreads, rbins,
              x1, y1, z1,
              X2=x2, Y2=y2, Z2=z2, isa='sse42',
              periodic=False)


def corrfunc_timing_fallback(data1, data2, rbins, period):
    x1 = data1[:, 0]
    y1 = data1[:, 1]
    z1 = data1[:, 2]
    
    x2 = data2[:, 0]
    y2 = data2[:, 1]
    z2 = data2[:, 2]
    nthreads = 1
    autocorr = 0
    return DD(autocorr, nthreads, rbins,
            x1, y1, z1,
              X2=x2, Y2=y2, Z2=z2, isa='fallback',
              periodic=False)
    
    
def main():
    functions_to_test = []
    function_names = []
    function_versions = []
    if kdcount is not None:
        functions_to_test.append(kdcount_timing)
        function_names.append("kdcount")
        function_versions.append(kdcount.__version__)
        
    if cKDTree is not None:
        functions_to_test.append(cKDTree_timing)
        function_names.append("cKDTree")
        from scipy import __version__ as scipy_ver
        function_versions.append(scipy_ver)
        
    if KDTree is not None:
        functions_to_test.append(KDTree_timing)
        from sklearn import __version__ as sklearn_ver
        function_names.append("KDTree")
        function_versions.append(sklearn_ver)

    if treecorr is not None:
        functions_to_test.append(treecorr_timing)
        function_names.append("TreeCorr")
        function_versions.append(treecorr.__version__)

    import halotools
    functions_to_test.append(halotools_timing)
    function_names.append("halotools")
    function_versions.append(halotools.__version__)

    import Corrfunc as corrfunc
    functions_to_test.append(corrfunc_timing)
    function_names.append("Corrfunc(AVX512f)")
    function_versions.append(corrfunc.__version__)

    functions_to_test.append(corrfunc_timing_avx)
    function_names.append("Corrfunc(AVX)")
    function_versions.append(corrfunc.__version__)
    
    functions_to_test.append(corrfunc_timing_sse)
    function_names.append("Corrfunc(SSE42)")
    function_versions.append(corrfunc.__version__)
    
    functions_to_test.append(corrfunc_timing_fallback)
    function_names.append("Corrfunc(fallback)")
    function_versions.append(corrfunc.__version__)
    for fn, fn_version in zip(function_names, function_versions):
        print("## {} version = {}".format(fn, fn_version))

    for clustered_data in [0, 1]:
        if clustered_data:
            from Corrfunc.io import read_catalog
            x, y, z = read_catalog()
            boxsize = 420.0
            x /= boxsize
            y /= boxsize
            z /= boxsize
        
        npts = [1e4, 5e4, 1e5, 5e5, 1e6]
        npts = [int(i) for i in npts]
        boxsize = 1.0
        rbins = np.logspace(-3, np.log10(0.05), 10) * boxsize

        print("## boxsize = {}. Bins are :".format(boxsize))
        for ll, uu in zip(rbins[0:-2], rbins[1:]):
            print("## {:0.5e} {:0.5e} ".format(ll, uu))
            

        print("## clustered data = {}".format(clustered_data))
        print("## Npts          ", end="")
        for ifunc in function_names:
            print(" {:14s} ".format(ifunc), end="")
                
        print("")
        
        for n in npts:
            npts1 = n
            npts2 = n
            if clustered_data == 0:
                data1 = np.random.random(npts1*3).reshape(npts1, 3)
                data2 = np.random.random(npts2*3).reshape(npts2, 3)
            else:
                newx1 = np.random.choice(x, npts1, replace=False)
                newy1 = np.random.choice(y, npts1, replace=False)
                newz1 = np.random.choice(z, npts1, replace=False)
                data1 = np.array([newx1, newy1, newz1]).transpose()

                newx2 = np.random.choice(x, npts2, replace=False)
                newy2 = np.random.choice(y, npts2, replace=False)
                newz2 = np.random.choice(z, npts2, replace=False)
                data2 = np.array([newx2, newy2, newz2]).transpose()

            if data1.max() > boxsize or data2.max() > boxsize:
                msg = "Max data1 = {0} or max data2 = {1} must be less than "\
                      "boxsize = {2}".format(data1.max(), data2.max(), boxsize)
                raise ValueError(msg)

            print("{0:7d}".format(n), end="")
            for ii, func_name in enumerate(function_names):
                t0 = time.time()
                functions_to_test[ii](data1, data2, rbins, boxsize)
                t1 = time.time()
                print(" {0:14.3f}  ".format(t1-t0), end="", flush=True)

            print("")
                    
if __name__ == '__main__':
    main()
## kdcount version = 0.3.29
## cKDTree version = 1.2.0
## TreeCorr version = 3.3.11
## halotools version = 0.6
## Corrfunc(AVX512f) version = 2.3.0
## Corrfunc(AVX) version = 2.3.0
## Corrfunc(SSE42) version = 2.3.0
## Corrfunc(fallback) version = 2.3.0
## boxsize = 1.0. Bins are :
## 1.00000e-03 1.54445e-03 
## 1.54445e-03 2.38533e-03 
## 2.38533e-03 3.68403e-03 
## 3.68403e-03 5.68981e-03 
## 5.68981e-03 8.78764e-03 
## 8.78764e-03 1.35721e-02 
## 1.35721e-02 2.09614e-02 
## 2.09614e-02 3.23739e-02
## 3.23739e-02 5.00000e-02
## clustered data = 0
## Npts           kdcount         cKDTree         TreeCorr        halotools       Corrfunc(AVX512f)  Corrfunc(AVX)   Corrfunc(SSE42)  Corrfunc(fallback) 
  10000          0.035            0.034            0.032            0.025            0.071            0.060            0.056            0.056  
  50000          0.304            0.262            0.251            0.100            0.216            0.225            0.221            0.221  
 100000          0.865            0.745            0.721            0.273            0.378            0.410            0.411            0.425  
 500000         10.709            9.459           10.123            4.541            1.644            2.003            2.410            2.686  
1000000         33.051           29.545           33.107           17.109            3.768            5.093            6.613            7.946  
## boxsize = 1.0. Bins are :
## 1.00000e-03 1.54445e-03 
## 1.54445e-03 2.38533e-03 
## 2.38533e-03 3.68403e-03 
## 3.68403e-03 5.68981e-03 
## 5.68981e-03 8.78764e-03 
## 8.78764e-03 1.35721e-02 
## 1.35721e-02 2.09614e-02 
## 2.09614e-02 3.23739e-02 
## 3.23739e-02 5.00000e-02
## clustered data = 1
## Npts           kdcount         cKDTree         TreeCorr        halotools       Corrfunc(AVX512f)  Corrfunc(AVX)   Corrfunc(SSE42)  Corrfunc(fallback) 
  10000          0.034            0.034            0.031            0.025            0.043            0.041            0.041            0.041  
  50000          0.307            0.260            0.239            0.101            0.169            0.176            0.172            0.173  
 100000          0.869            0.741            0.695            0.270            0.322            0.359            0.353            0.368  
 500000         10.799            9.451            9.990            4.549            1.600            1.989            2.446            2.691  
1000000         33.221           29.097           32.744           17.303            3.839            5.111            6.686            8.074  

Looks like no difference between clustered vs random data.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment