manodeep/Latest timings

## Latest timings
## Timings with halotools v0.4 and Corrfunc v2.0.0. Updated on Sep 12 (random data, Corrfunc compiled with gcc6.0)
## Note this test is capped at 1 million because the first three codes take too long and halotools randomly subsamples
## particles above 1 million. The algorithm in halotools is an *exact* copy of `Corrfunc` - so the timing comparison simply
## tests the capability of the cython compiler.

## Timings generated on Oct 17, 2016, with mostly ready version of Corrfunc v2.0. Non-periodic calculations with random points.

## Npts  kdcount    cKDTree    KDTree     halotools  Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback)
  10000   0.039      0.057      0.087      0.026      0.240      0.053      0.052
  50000   0.381      0.399      0.936      0.121      0.226      0.224      0.214
 100000   1.080      1.104      2.757      0.383      0.390      0.386      0.414
 500000  14.308     14.114     44.586      7.828      2.689      3.252      4.319
1000000  46.914     42.952    147.532     28.983      7.601     10.433     13.725


## There was a bug in the function dispatcher and the fallback algorithm was really the AVX implementation. That's
## why the timings are so close between the AVX and fallback. Bug was fixed in https://github.com/manodeep/Corrfunc/commit/2ed855805414bc10300858499806b7bc07aa773d

## Npts  kdcount    cKDTree    halotools  Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback)
  10000   0.050      0.153      0.030      0.223      0.031      0.030
  50000   0.451      0.696      0.123      0.101      0.105      0.109
 100000   1.265      1.925      0.359      0.219      0.249      0.281
 500000  16.541     23.059      6.735      2.687      3.583      4.990
1000000  51.864     68.770     25.743      9.302     13.992     18.639

## Version with dispatch bug.

##    Npts  kdcount    cKDTree    KDTree     halotools  Corrfunc(AVX)   Corrfunc(SSE42) Corrfunc(fallback)
     10000   0.052      0.157      0.149      0.028      0.230            0.031              0.027
     50000   0.443      0.700      0.717      0.118      0.095            0.104              0.092
    100000   1.292      1.898      1.889      0.344      0.202            0.240              0.200
    500000  16.257     21.738     21.595      6.374      2.465            3.459              2.482
   1000000  48.638     66.999     70.141     26.415      9.326           13.539              9.860

## Previous version -> done at Salt Lake City, UT (Galaxy-Halo Conference, Mar 2016)
## Npts  kdcount    cKDTree    KDTree     halotools  Corrfunc(AVX)  Corrfunc(SSE)  Corrfunc(Naive)
  80000   0.947      1.518      1.569      2.109      0.183          0.194          0.210
 100000   1.324      1.990      2.012      2.821      0.240          0.267          0.298
 500000  16.790     23.291     23.151     51.567      2.872          3.902          5.146
1000000  52.749     71.471     71.354    203.456     10.165         14.473         19.607


## timing_tests.py
from __future__ import print_function
import numpy as np
try:
    import kdcount
except:
    kdcount = None

try:
    from scipy.spatial import cKDTree
except ImportError:
    cKDTree = None

from halotools.mock_observables.pair_counters import npairs_3d \
    as halotools_npairs

from Corrfunc.theory.DD import DD
import time

try:
    from sklearn.neighbors import KDTree
except ImportError:
    KDTree = None


def kdcount_timing(data1, data2, rbins, period):
    tree1 = kdcount.KDTree(data1).root
    tree2 = kdcount.KDTree(data2).root
    return tree1.count(tree2, r=rbins)


def halotools_timing(data1, data2, rbins, period):
    return halotools_npairs(data1, data2, rbins)


def cKDTree_timing(data1, data2, rbins, period):
    tree1 = cKDTree(data1)
    tree2 = cKDTree(data2)
    return tree1.count_neighbors(tree2, rbins)


def KDTree_timing(data1, data2, rbins, period):
    tree1 = KDTree(data1)
    return tree1.two_point_correlation(data2, rbins)


def corrfunc_timing(data1, data2, rbins, period):
    x1 = data1[:, 0]
    y1 = data1[:, 1]
    z1 = data1[:, 2]

    x2 = data2[:, 0]
    y2 = data2[:, 1]
    z2 = data2[:, 2]
    nthreads = 1
    autocorr = 0
    return DD(autocorr, nthreads, rbins,
              x1, y1, z1,
              X2=x2, Y2=y2, Z2=z2, isa='avx',
              periodic=False)


def corrfunc_timing_sse(data1, data2, rbins, period):
    x1 = data1[:, 0]
    y1 = data1[:, 1]
    z1 = data1[:, 2]

    x2 = data2[:, 0]
    y2 = data2[:, 1]
    z2 = data2[:, 2]
    nthreads = 1
    autocorr = 0
    return DD(autocorr, nthreads, rbins,
              x1, y1, z1,
              X2=x2, Y2=y2, Z2=z2, isa='sse42',
              periodic=False)


def corrfunc_timing_fallback(data1, data2, rbins, period):
    x1 = data1[:, 0]
    y1 = data1[:, 1]
    z1 = data1[:, 2]

    x2 = data2[:, 0]
    y2 = data2[:, 1]
    z2 = data2[:, 2]
    nthreads = 1
    autocorr = 0
    return DD(autocorr, nthreads, rbins,
              x1, y1, z1,
              X2=x2, Y2=y2, Z2=z2, isa='fallback',
              periodic=False)


def main():
    functions_to_test = []
    function_names = []
    if kdcount is not None:
        functions_to_test.append(kdcount_timing)
        function_names.append("kdcount")

    if cKDTree is not None:
        functions_to_test.append(cKDTree_timing)
        function_names.append("cKDTree")

    if KDTree is not None:
        functions_to_test.append(KDTree_timing)
        function_names.append("KDTree")

    functions_to_test.append(halotools_timing)
    function_names.append("halotools")

    functions_to_test.append(corrfunc_timing)
    function_names.append("Corrfunc(AVX)")

    functions_to_test.append(corrfunc_timing_sse)
    function_names.append("Corrfunc(SSE42)")

    functions_to_test.append(corrfunc_timing_fallback)
    function_names.append("Corrfunc(fallback)")

    clustered_data = 0
    if clustered_data:
        from Corrfunc.io import read_catalog
        x, y, z = read_catalog()
        boxsize = 420.0
        x /= boxsize
        y /= boxsize
        z /= boxsize

    npts = [1e4, 5e4, 1e5, 5e5, 1e6]
    npts = [int(i) for i in npts]
    print("## Npts ", end="")
    for ifunc in function_names:
        print(" {0:10s}".format(ifunc), end="")

    print("")

    for n in npts:
        npts1 = n
        npts2 = n
        if clustered_data == 0:
            data1 = np.random.random(npts1*3).reshape(npts1, 3)
            data2 = np.random.random(npts2*3).reshape(npts2, 3)
        else:
            newx1 = np.random.choice(x, npts1, replace=False)
            newy1 = np.random.choice(y, npts1, replace=False)
            newz1 = np.random.choice(z, npts1, replace=False)

            data1 = np.array([newx1, newy1, newz1]).transpose()
            newx2 = np.random.choice(x, npts2, replace=False)
            newy2 = np.random.choice(y, npts2, replace=False)
            newz2 = np.random.choice(z, npts2, replace=False)
            data2 = np.array([newx2, newy2, newz2]).transpose()

        boxsize = 1.0
        rbins = np.linspace(0.001*boxsize, 0.05*boxsize, 10)
        if data1.max() > boxsize or data2.max() > boxsize:
            msg = "Max data1 = {0} or max data2 = {1} must be less than "\
                  "boxsize = {2}".format(data1.max(), data2.max(), boxsize)
            raise ValueError(msg)

        print("{0:7d}".format(n), end="")
        for ii, func_name in enumerate(function_names):
            t0 = time.time()
            functions_to_test[ii](data1, data2, rbins, boxsize)
            t1 = time.time()
            print(" {0:7.3f}   ".format(t1-t0), end="")

        print("")

if __name__ == '__main__':
    main()
	## Timings with halotools v0.4 and Corrfunc v2.0.0. Updated on Sep 12 (random data, Corrfunc compiled with gcc6.0)
	## Note this test is capped at 1 million because the first three codes take too long and halotools randomly subsamples
	## particles above 1 million. The algorithm in halotools is an exact copy of `Corrfunc` - so the timing comparison simply
	## tests the capability of the cython compiler.

	## Timings generated on Oct 17, 2016, with mostly ready version of Corrfunc v2.0. Non-periodic calculations with random points.

	## Npts kdcount cKDTree KDTree halotools Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback)
	10000 0.039 0.057 0.087 0.026 0.240 0.053 0.052
	50000 0.381 0.399 0.936 0.121 0.226 0.224 0.214
	100000 1.080 1.104 2.757 0.383 0.390 0.386 0.414
	500000 14.308 14.114 44.586 7.828 2.689 3.252 4.319
	1000000 46.914 42.952 147.532 28.983 7.601 10.433 13.725


	## There was a bug in the function dispatcher and the fallback algorithm was really the AVX implementation. That's
	## why the timings are so close between the AVX and fallback. Bug was fixed in https://github.com/manodeep/Corrfunc/commit/2ed855805414bc10300858499806b7bc07aa773d

	## Npts kdcount cKDTree halotools Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback)
	10000 0.050 0.153 0.030 0.223 0.031 0.030
	50000 0.451 0.696 0.123 0.101 0.105 0.109
	100000 1.265 1.925 0.359 0.219 0.249 0.281
	500000 16.541 23.059 6.735 2.687 3.583 4.990
	1000000 51.864 68.770 25.743 9.302 13.992 18.639

	## Version with dispatch bug.

	## Npts kdcount cKDTree KDTree halotools Corrfunc(AVX) Corrfunc(SSE42) Corrfunc(fallback)
	10000 0.052 0.157 0.149 0.028 0.230 0.031 0.027
	50000 0.443 0.700 0.717 0.118 0.095 0.104 0.092
	100000 1.292 1.898 1.889 0.344 0.202 0.240 0.200
	500000 16.257 21.738 21.595 6.374 2.465 3.459 2.482
	1000000 48.638 66.999 70.141 26.415 9.326 13.539 9.860

	## Previous version -> done at Salt Lake City, UT (Galaxy-Halo Conference, Mar 2016)
	## Npts kdcount cKDTree KDTree halotools Corrfunc(AVX) Corrfunc(SSE) Corrfunc(Naive)
	80000 0.947 1.518 1.569 2.109 0.183 0.194 0.210
	100000 1.324 1.990 2.012 2.821 0.240 0.267 0.298
	500000 16.790 23.291 23.151 51.567 2.872 3.902 5.146
	1000000 52.749 71.471 71.354 203.456 10.165 14.473 19.607
	from __future__ import print_function
	import numpy as np
	try:
	import kdcount
	except:
	kdcount = None

	try:
	from scipy.spatial import cKDTree
	except ImportError:
	cKDTree = None

	from halotools.mock_observables.pair_counters import npairs_3d \
	as halotools_npairs

	from Corrfunc.theory.DD import DD
	import time

	try:
	from sklearn.neighbors import KDTree
	except ImportError:
	KDTree = None


	def kdcount_timing(data1, data2, rbins, period):
	tree1 = kdcount.KDTree(data1).root
	tree2 = kdcount.KDTree(data2).root
	return tree1.count(tree2, r=rbins)


	def halotools_timing(data1, data2, rbins, period):
	return halotools_npairs(data1, data2, rbins)


	def cKDTree_timing(data1, data2, rbins, period):
	tree1 = cKDTree(data1)
	tree2 = cKDTree(data2)
	return tree1.count_neighbors(tree2, rbins)


	def KDTree_timing(data1, data2, rbins, period):
	tree1 = KDTree(data1)
	return tree1.two_point_correlation(data2, rbins)


	def corrfunc_timing(data1, data2, rbins, period):
	x1 = data1[:, 0]
	y1 = data1[:, 1]
	z1 = data1[:, 2]

	x2 = data2[:, 0]
	y2 = data2[:, 1]
	z2 = data2[:, 2]
	nthreads = 1
	autocorr = 0
	return DD(autocorr, nthreads, rbins,
	x1, y1, z1,
	X2=x2, Y2=y2, Z2=z2, isa='avx',
	periodic=False)


	def corrfunc_timing_sse(data1, data2, rbins, period):
	x1 = data1[:, 0]
	y1 = data1[:, 1]
	z1 = data1[:, 2]

	x2 = data2[:, 0]
	y2 = data2[:, 1]
	z2 = data2[:, 2]
	nthreads = 1
	autocorr = 0
	return DD(autocorr, nthreads, rbins,
	x1, y1, z1,
	X2=x2, Y2=y2, Z2=z2, isa='sse42',
	periodic=False)


	def corrfunc_timing_fallback(data1, data2, rbins, period):
	x1 = data1[:, 0]
	y1 = data1[:, 1]
	z1 = data1[:, 2]

	x2 = data2[:, 0]
	y2 = data2[:, 1]
	z2 = data2[:, 2]
	nthreads = 1
	autocorr = 0
	return DD(autocorr, nthreads, rbins,
	x1, y1, z1,
	X2=x2, Y2=y2, Z2=z2, isa='fallback',
	periodic=False)


	def main():
	functions_to_test = []
	function_names = []
	if kdcount is not None:
	functions_to_test.append(kdcount_timing)
	function_names.append("kdcount")

	if cKDTree is not None:
	functions_to_test.append(cKDTree_timing)
	function_names.append("cKDTree")

	if KDTree is not None:
	functions_to_test.append(KDTree_timing)
	function_names.append("KDTree")

	functions_to_test.append(halotools_timing)
	function_names.append("halotools")

	functions_to_test.append(corrfunc_timing)
	function_names.append("Corrfunc(AVX)")

	functions_to_test.append(corrfunc_timing_sse)
	function_names.append("Corrfunc(SSE42)")

	functions_to_test.append(corrfunc_timing_fallback)
	function_names.append("Corrfunc(fallback)")

	clustered_data = 0
	if clustered_data:
	from Corrfunc.io import read_catalog
	x, y, z = read_catalog()
	boxsize = 420.0
	x /= boxsize
	y /= boxsize
	z /= boxsize

	npts = [1e4, 5e4, 1e5, 5e5, 1e6]
	npts = [int(i) for i in npts]
	print("## Npts ", end="")
	for ifunc in function_names:
	print(" {0:10s}".format(ifunc), end="")

	print("")

	for n in npts:
	npts1 = n
	npts2 = n
	if clustered_data == 0:
	data1 = np.random.random(npts1*3).reshape(npts1, 3)
	data2 = np.random.random(npts2*3).reshape(npts2, 3)
	else:
	newx1 = np.random.choice(x, npts1, replace=False)
	newy1 = np.random.choice(y, npts1, replace=False)
	newz1 = np.random.choice(z, npts1, replace=False)

	data1 = np.array([newx1, newy1, newz1]).transpose()
	newx2 = np.random.choice(x, npts2, replace=False)
	newy2 = np.random.choice(y, npts2, replace=False)
	newz2 = np.random.choice(z, npts2, replace=False)
	data2 = np.array([newx2, newy2, newz2]).transpose()

	boxsize = 1.0
	rbins = np.linspace(0.001boxsize, 0.05boxsize, 10)
	if data1.max() > boxsize or data2.max() > boxsize:
	msg = "Max data1 = {0} or max data2 = {1} must be less than "\
	"boxsize = {2}".format(data1.max(), data2.max(), boxsize)
	raise ValueError(msg)

	print("{0:7d}".format(n), end="")
	for ii, func_name in enumerate(function_names):
	t0 = time.time()
	functions_to_test[ii](data1, data2, rbins, boxsize)
	t1 = time.time()
	print(" {0:7.3f} ".format(t1-t0), end="")

	print("")

	if __name__ == '__main__':
	main()