Skip to content

Instantly share code, notes, and snippets.

@jjerphan
Last active August 13, 2021 15:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jjerphan/6c1f3e4c80908b862ccce6682835d36a to your computer and use it in GitHub Desktop.
Save jjerphan/6c1f3e4c80908b862ccce6682835d36a to your computer and use it in GitHub Desktop.
scikit-learn Binary Tree benchmark script -- used in scikit-learn/scikit-learn#19473
import numpy as np
from sklearn.neighbors import KDTree, BallTree
from .common import Benchmark
class BinaryTreeBenchmark(Benchmark):
"""
Base class for BinaryTree benchmarks.
"""
param_names = ['BinaryTree', 'n', 'matrix_type', 'leaf_size']
params = [
[KDTree, BallTree],
[1000, 10000],
['random', 'ordered', 'reverse_ordered', 'duplicated'],
[1 , 20, 40, 100],
]
test_cases = {
"random": lambda n: np.random.rand(n),
"ordered": lambda n: np.arange(n, dtype=float),
"reverse_ordered": lambda n: np.arange(n, 0, -1, dtype=float),
"duplicated": lambda n: np.zeros([n], dtype=float)
}
class BinaryTreeCreation(BinaryTreeBenchmark):
"""
Benchmarks for BinaryTree creation.
"""
def setup(self, BinaryTree, n, matrix_type, leaf_size):
self.data = np.expand_dims(self.test_cases[matrix_type](n), -1)
self.leaf_size = leaf_size
self.BinaryTree = BinaryTree
def time_creation(self, BinaryTree, n, matrix_type, leaf_size):
self.BinaryTree(self.data, leaf_size=self.leaf_size)
class BinaryTreeQuery(BinaryTreeBenchmark):
"""
Benchmarks for BinaryTree queries.
"""
param_names = BinaryTreeBenchmark.param_names + ['breadth_first']
params = BinaryTreeBenchmark.params + [[True, False]]
def setup(self, BinaryTree, n, matrix_type, leaf_size, breadth_first):
self.data = np.expand_dims(self.test_cases[matrix_type](n), -1)
self.tree = BinaryTree(self.data, leaf_size=leaf_size)
self.query_points = np.expand_dims(np.random.rand(10000), -1)
def time_query(self, BinaryTree, n, matrix_type, leaf_size, breadth_first):
self.tree.query(
self.query_points,
dualtree=False,
breadth_first=breadth_first,
sort_results=False)
@jjerphan
Copy link
Author

jjerphan commented Apr 9, 2021

Raw results of removing stats.

See scikit-learn/scikit-learn#13331 for reference.

· Creating environments
· Discovering benchmarks
·· Uninstalling from conda-py3.9-cython-joblib-numpy-scipy-threadpoolctl
·· Installing 7dd7c947 <remove_stats> into conda-py3.9-cython-joblib-numpy-scipy-threadpoolctl.
· Running 4 total benchmarks (2 commits * 1 environments * 2 benchmarks)
[  0.00%] · For scikit-learn commit 7dd7c947 <remove_stats>:
[  0.00%] ·· Benchmarking conda-py3.9-cython-joblib-numpy-scipy-threadpoolctl
[ 25.00%] ··· binary_tree.BinaryTreeCreation.time_creation                                                                                                                                                                                                               ok
[ 25.00%] ··· ======================================= ======= ================= ============= ============= ============= =============
              --                                                                                       leaf_size                       
              ----------------------------------------------------------------- -------------------------------------------------------
                             BinaryTree                  n       matrix_type          1             20            40           100     
              ======================================= ======= ================= ============= ============= ============= =============
                 sklearn.neighbors._kd_tree.KDTree       10         random         100±10μs      183±30μs      201±40μs      200±20μs  
                 sklearn.neighbors._kd_tree.KDTree       10        ordered        292±200μs      135±20μs      115±5μs       116±6μs   
                 sklearn.neighbors._kd_tree.KDTree       10    reverse_ordered     120±5μs       113±4μs       114±6μs       114±7μs   
                 sklearn.neighbors._kd_tree.KDTree       10       duplicated       116±7μs       114±20μs      117±5μs       108±3μs   
                 sklearn.neighbors._kd_tree.KDTree      100         random         148±6μs       116±3μs       114±4μs       119±4μs   
                 sklearn.neighbors._kd_tree.KDTree      100        ordered         154±40μs      124±9μs       124±7μs       121±3μs   
                 sklearn.neighbors._kd_tree.KDTree      100    reverse_ordered     140±9μs       111±6μs       104±9μs       85.1±1μs  
                 sklearn.neighbors._kd_tree.KDTree      100       duplicated       108±6μs       95.0±3μs      91.2±2μs      89.6±2μs  
                 sklearn.neighbors._kd_tree.KDTree      1000        random         462±10μs      252±7μs       218±7μs       186±5μs   
                 sklearn.neighbors._kd_tree.KDTree      1000       ordered         342±10μs      182±3μs       172±6μs       142±3μs   
                 sklearn.neighbors._kd_tree.KDTree      1000   reverse_ordered     358±9μs       186±4μs       173±5μs       150±4μs   
                 sklearn.neighbors._kd_tree.KDTree      1000      duplicated       277±5μs       170±5μs       163±5μs       144±4μs   
                 sklearn.neighbors._kd_tree.KDTree     10000        random       4.38±0.08ms   2.07±0.05ms   1.94±0.06ms   1.67±0.04ms 
                 sklearn.neighbors._kd_tree.KDTree     10000       ordered        3.52±0.1ms   1.57±0.04ms   1.34±0.03ms   1.08±0.02ms 
                 sklearn.neighbors._kd_tree.KDTree     10000   reverse_ordered    3.42±0.1ms   1.57±0.04ms   1.27±0.02ms   1.09±0.02ms 
                 sklearn.neighbors._kd_tree.KDTree     10000      duplicated     2.68±0.05ms   1.33±0.03ms   1.17±0.02ms   1.02±0.02ms 
               sklearn.neighbors._ball_tree.BallTree     10         random         86.7±4μs      84.2±2μs      79.2±2μs      79.9±2μs  
               sklearn.neighbors._ball_tree.BallTree     10        ordered         81.6±2μs      79.3±2μs      80.2±2μs      80.4±2μs  
               sklearn.neighbors._ball_tree.BallTree     10    reverse_ordered     82.8±2μs      80.8±2μs      80.8±1μs      85.7±2μs  
               sklearn.neighbors._ball_tree.BallTree     10       duplicated       87.8±2μs      85.0±2μs      83.7±2μs      84.3±2μs  
               sklearn.neighbors._ball_tree.BallTree    100         random         107±3μs       91.9±2μs      87.7±2μs      80.1±2μs  
               sklearn.neighbors._ball_tree.BallTree    100        ordered         99.0±2μs      86.4±2μs      88.7±3μs      85.8±3μs  
               sklearn.neighbors._ball_tree.BallTree    100    reverse_ordered     105±2μs       90.7±2μs      83.3±2μs      80.4±2μs  
               sklearn.neighbors._ball_tree.BallTree    100       duplicated       96.7±2μs      85.1±2μs      83.7±2μs      81.6±2μs  
               sklearn.neighbors._ball_tree.BallTree    1000        random         413±9μs       251±8μs       205±4μs       177±4μs   
               sklearn.neighbors._ball_tree.BallTree    1000       ordered         299±7μs       199±6μs       174±5μs       154±4μs   
               sklearn.neighbors._ball_tree.BallTree    1000   reverse_ordered     331±9μs       188±3μs       175±4μs       153±3μs   
               sklearn.neighbors._ball_tree.BallTree    1000      duplicated       289±8μs       185±5μs       157±2μs       141±3μs   
               sklearn.neighbors._ball_tree.BallTree   10000        random       4.26±0.09ms   2.29±0.05ms   2.03±0.06ms   1.76±0.04ms 
               sklearn.neighbors._ball_tree.BallTree   10000       ordered        3.10±0.1ms   1.65±0.05ms   1.45±0.03ms   1.25±0.03ms 
               sklearn.neighbors._ball_tree.BallTree   10000   reverse_ordered   3.21±0.07ms   1.67±0.04ms   1.36±0.02ms   1.17±0.02ms 
               sklearn.neighbors._ball_tree.BallTree   10000      duplicated     2.64±0.06ms   1.44±0.03ms   1.26±0.02ms   1.11±0.02ms 
              ======================================= ======= ================= ============= ============= ============= =============

[ 50.00%] ··· binary_tree.BinaryTreeQuery.time_query                                                                                                                                                                                                                     ok
[ 50.00%] ··· ======================================= ======= ================= ============= ============ ============ ============
              --                                                                                     leaf_size                      
              ----------------------------------------------------------------- ----------------------------------------------------
                             BinaryTree                  n       matrix_type          1            20           40          100     
              ======================================= ======= ================= ============= ============ ============ ============
                 sklearn.neighbors._kd_tree.KDTree       10         random         88.6±2μs     73.8±3μs     73.7±3μs     78.0±3μs  
                 sklearn.neighbors._kd_tree.KDTree       10        ordered         91.7±3μs     72.3±2μs     72.2±2μs     73.7±4μs  
                 sklearn.neighbors._kd_tree.KDTree       10    reverse_ordered     96.3±3μs     78.9±2μs     78.5±2μs     78.6±2μs  
                 sklearn.neighbors._kd_tree.KDTree       10       duplicated       138±3μs      73.7±2μs     68.8±2μs     69.9±2μs  
                 sklearn.neighbors._kd_tree.KDTree      100         random         113±3μs      102±6μs      111±6μs      128±4μs   
                 sklearn.neighbors._kd_tree.KDTree      100        ordered         108±2μs      90.0±2μs     94.5±2μs     109±3μs   
                 sklearn.neighbors._kd_tree.KDTree      100    reverse_ordered     121±3μs      96.9±1μs     98.2±2μs     170±5μs   
                 sklearn.neighbors._kd_tree.KDTree      100       duplicated       682±10μs     142±2μs      122±3μs      111±2μs   
                 sklearn.neighbors._kd_tree.KDTree      1000        random         141±3μs      134±5μs      140±5μs      182±10μs  
                 sklearn.neighbors._kd_tree.KDTree      1000       ordered         147±8μs      143±10μs     122±2μs      141±3μs   
                 sklearn.neighbors._kd_tree.KDTree      1000   reverse_ordered     150±2μs      136±4μs      140±3μs      158±4μs   
                 sklearn.neighbors._kd_tree.KDTree      1000      duplicated      5.49±0.2ms    873±30μs     634±10μs     552±10μs  
                 sklearn.neighbors._kd_tree.KDTree     10000        random         180±2μs      183±5μs      181±6μs      215±8μs   
                 sklearn.neighbors._kd_tree.KDTree     10000       ordered         169±4μs      145±4μs      156±4μs      184±6μs   
                 sklearn.neighbors._kd_tree.KDTree     10000   reverse_ordered     186±3μs      158±5μs      165±3μs      189±3μs   
                 sklearn.neighbors._kd_tree.KDTree     10000      duplicated      76.6±0.7ms   7.23±0.2ms   5.86±0.1ms   5.22±0.1ms 
               sklearn.neighbors._ball_tree.BallTree     10         random         78.8±2μs     75.2±2μs     74.7±2μs     75.2±3μs  
               sklearn.neighbors._ball_tree.BallTree     10        ordered         76.4±2μs     69.8±1μs     70.2±2μs     67.2±1μs  
               sklearn.neighbors._ball_tree.BallTree     10    reverse_ordered     72.7±1μs     71.3±1μs     71.7±1μs     72.3±1μs  
               sklearn.neighbors._ball_tree.BallTree     10       duplicated      91.8±0.9μs    67.2±1μs     66.9±1μs     67.5±1μs  
               sklearn.neighbors._ball_tree.BallTree    100         random         95.0±1μs     97.8±4μs     104±6μs      128±3μs   
               sklearn.neighbors._ball_tree.BallTree    100        ordered         92.2±3μs     86.4±2μs     94.3±3μs     108±2μs   
               sklearn.neighbors._ball_tree.BallTree    100    reverse_ordered     86.1±2μs     81.9±1μs     89.3±1μs     165±3μs   
               sklearn.neighbors._ball_tree.BallTree    100       duplicated       354±9μs      121±2μs      112±2μs      107±2μs   
               sklearn.neighbors._ball_tree.BallTree    1000        random         110±2μs      121±4μs      129±4μs      153±7μs   
               sklearn.neighbors._ball_tree.BallTree    1000       ordered         96.7±2μs     98.1±2μs     107±1μs      137±3μs   
               sklearn.neighbors._ball_tree.BallTree    1000   reverse_ordered     101±2μs      102±3μs      107±2μs      128±2μs   
               sklearn.neighbors._ball_tree.BallTree    1000      duplicated     2.41±0.06ms    648±10μs     559±10μs     527±10μs  
               sklearn.neighbors._ball_tree.BallTree   10000        random         133±2μs      145±4μs      161±6μs      208±7μs   
               sklearn.neighbors._ball_tree.BallTree   10000       ordered         118±3μs      117±2μs      131±4μs      162±3μs   
               sklearn.neighbors._ball_tree.BallTree   10000   reverse_ordered     115±3μs      110±2μs      123±2μs      152±3μs   
               sklearn.neighbors._ball_tree.BallTree   10000      duplicated      34.1±0.5ms   5.39±0.1ms   4.86±0.1ms   4.82±0.1ms 
              ======================================= ======= ================= ============= ============ ============ ============

[ 50.00%] · For scikit-learn commit 80e985b5 <main>:
[ 50.00%] ·· Building for conda-py3.9-cython-joblib-numpy-scipy-threadpoolctl..
[ 50.00%] ·· Benchmarking conda-py3.9-cython-joblib-numpy-scipy-threadpoolctl
[ 75.00%] ··· binary_tree.BinaryTreeCreation.time_creation                                                                                                                                                                                                               ok
[ 75.00%] ··· ======================================= ======= ================= ============= ============= ============= =============
              --                                                                                       leaf_size                       
              ----------------------------------------------------------------- -------------------------------------------------------
                             BinaryTree                  n       matrix_type          1             20            40           100     
              ======================================= ======= ================= ============= ============= ============= =============
                 sklearn.neighbors._kd_tree.KDTree       10         random         87.6±2μs      86.2±2μs      81.9±2μs      82.2±2μs  
                 sklearn.neighbors._kd_tree.KDTree       10        ordered         85.3±2μs      82.7±2μs      86.9±4μs      87.6±2μs  
                 sklearn.neighbors._kd_tree.KDTree       10    reverse_ordered     88.9±3μs      83.8±2μs      83.1±2μs      84.5±2μs  
                 sklearn.neighbors._kd_tree.KDTree       10       duplicated       85.9±2μs      84.5±2μs      84.5±1μs      83.9±1μs  
                 sklearn.neighbors._kd_tree.KDTree      100         random         113±3μs       91.9±2μs      88.7±2μs      85.1±2μs  
                 sklearn.neighbors._kd_tree.KDTree      100        ordered         109±2μs       91.5±2μs      88.3±2μs      85.1±2μs  
                 sklearn.neighbors._kd_tree.KDTree      100    reverse_ordered     111±2μs       91.6±2μs      90.3±2μs      86.9±1μs  
                 sklearn.neighbors._kd_tree.KDTree      100       duplicated       105±1μs       93.2±1μs     90.7±0.8μs    87.3±0.5μs 
                 sklearn.neighbors._kd_tree.KDTree      1000        random         438±10μs      241±4μs       211±5μs       181±3μs   
                 sklearn.neighbors._kd_tree.KDTree      1000       ordered         350±8μs       187±3μs       166±2μs       146±3μs   
                 sklearn.neighbors._kd_tree.KDTree      1000   reverse_ordered     385±10μs      200±7μs       175±5μs       151±3μs   
                 sklearn.neighbors._kd_tree.KDTree      1000      duplicated       301±8μs       175±2μs       165±4μs       147±4μs   
                 sklearn.neighbors._kd_tree.KDTree     10000        random        4.65±0.1ms   2.21±0.05ms   1.82±0.03ms   1.58±0.02ms 
                 sklearn.neighbors._kd_tree.KDTree     10000       ordered       3.31±0.07ms   1.47±0.03ms   1.27±0.02ms   1.10±0.02ms 
                 sklearn.neighbors._kd_tree.KDTree     10000   reverse_ordered   3.46±0.06ms   1.59±0.04ms   1.38±0.03ms   1.18±0.03ms 
                 sklearn.neighbors._kd_tree.KDTree     10000      duplicated     2.89±0.06ms   1.42±0.04ms   1.25±0.03ms   1.08±0.02ms 
               sklearn.neighbors._ball_tree.BallTree     10         random         84.3±2μs      81.5±1μs      81.8±1μs      79.5±2μs  
               sklearn.neighbors._ball_tree.BallTree     10        ordered         81.1±2μs      79.7±2μs      80.4±2μs      80.5±2μs  
               sklearn.neighbors._ball_tree.BallTree     10    reverse_ordered     83.1±2μs      80.3±2μs      80.6±1μs      84.2±2μs  
               sklearn.neighbors._ball_tree.BallTree     10       duplicated       86.6±2μs      83.2±2μs      83.6±2μs      83.0±2μs  
               sklearn.neighbors._ball_tree.BallTree    100         random         106±2μs       90.3±2μs      87.2±2μs      84.2±2μs  
               sklearn.neighbors._ball_tree.BallTree    100        ordered         103±2μs       89.5±2μs      86.9±2μs      83.6±2μs  
               sklearn.neighbors._ball_tree.BallTree    100    reverse_ordered     99.2±2μs      89.8±2μs      86.4±2μs      79.1±2μs  
               sklearn.neighbors._ball_tree.BallTree    100       duplicated       97.0±2μs      84.9±2μs      84.1±2μs      82.0±2μs  
               sklearn.neighbors._ball_tree.BallTree    1000        random         381±4μs       238±4μs       211±3μs       182±3μs   
               sklearn.neighbors._ball_tree.BallTree    1000       ordered         302±6μs       188±3μs       166±2μs       147±2μs   
               sklearn.neighbors._ball_tree.BallTree    1000   reverse_ordered     313±4μs       193±3μs       169±2μs       147±3μs   
               sklearn.neighbors._ball_tree.BallTree    1000      duplicated       276±5μs       180±2μs       161±2μs       144±2μs   
               sklearn.neighbors._ball_tree.BallTree   10000        random       3.93±0.09ms   2.18±0.03ms   1.91±0.03ms   1.68±0.03ms 
               sklearn.neighbors._ball_tree.BallTree   10000       ordered       2.91±0.06ms   1.57±0.03ms   1.36±0.02ms   1.19±0.02ms 
               sklearn.neighbors._ball_tree.BallTree   10000   reverse_ordered   3.02±0.05ms   1.58±0.02ms   1.48±0.03ms   1.27±0.02ms 
               sklearn.neighbors._ball_tree.BallTree   10000      duplicated     2.82±0.05ms   1.55±0.03ms   1.27±0.03ms   1.18±0.03ms 
              ======================================= ======= ================= ============= ============= ============= =============

[100.00%] ··· binary_tree.BinaryTreeQuery.time_query                                                                                                                                                                                                                     ok
[100.00%] ··· ======================================= ======= ================= ============= ============ ============ ============
              --                                                                                     leaf_size                      
              ----------------------------------------------------------------- ----------------------------------------------------
                             BinaryTree                  n       matrix_type          1            20           40          100     
              ======================================= ======= ================= ============= ============ ============ ============
                 sklearn.neighbors._kd_tree.KDTree       10         random         90.0±2μs     71.6±2μs     72.2±1μs     72.0±1μs  
                 sklearn.neighbors._kd_tree.KDTree       10        ordered         88.1±2μs     69.4±2μs     70.2±1μs     69.9±1μs  
                 sklearn.neighbors._kd_tree.KDTree       10    reverse_ordered     94.2±2μs     77.1±1μs     77.3±1μs     81.5±2μs  
                 sklearn.neighbors._kd_tree.KDTree       10       duplicated       135±3μs      71.8±1μs     71.5±1μs     71.7±1μs  
                 sklearn.neighbors._kd_tree.KDTree      100         random         115±2μs      99.1±1μs     106±2μs      121±3μs   
                 sklearn.neighbors._kd_tree.KDTree      100        ordered         116±3μs      97.0±3μs     101±3μs      114±4μs   
                 sklearn.neighbors._kd_tree.KDTree      100    reverse_ordered     126±3μs      102±3μs      104±2μs      175±3μs   
                 sklearn.neighbors._kd_tree.KDTree      100       duplicated       673±10μs     150±4μs      128±4μs      118±4μs   
                 sklearn.neighbors._kd_tree.KDTree      1000        random         143±3μs      134±3μs      144±4μs      170±4μs   
                 sklearn.neighbors._kd_tree.KDTree      1000       ordered         135±3μs      120±3μs      127±3μs      150±4μs   
                 sklearn.neighbors._kd_tree.KDTree      1000   reverse_ordered     152±4μs      134±3μs      138±3μs      155±4μs   
                 sklearn.neighbors._kd_tree.KDTree      1000      duplicated      5.34±0.1ms    808±20μs     641±10μs     566±8μs   
                 sklearn.neighbors._kd_tree.KDTree     10000        random         188±4μs      164±6μs      186±5μs      222±5μs   
                 sklearn.neighbors._kd_tree.KDTree     10000       ordered         166±4μs      144±3μs      155±4μs      183±3μs   
                 sklearn.neighbors._kd_tree.KDTree     10000   reverse_ordered     197±4μs      163±4μs      172±4μs      198±5μs   
                 sklearn.neighbors._kd_tree.KDTree     10000      duplicated       80.9±1ms    7.21±0.1ms   5.88±0.1ms   4.92±0.1ms 
               sklearn.neighbors._ball_tree.BallTree     10         random         74.8±2μs     67.6±1μs     69.1±1μs     69.9±1μs  
               sklearn.neighbors._ball_tree.BallTree     10        ordered         74.8±2μs     67.8±2μs     68.1±1μs     68.0±1μs  
               sklearn.neighbors._ball_tree.BallTree     10    reverse_ordered     74.0±1μs     72.5±2μs     72.8±1μs     73.2±1μs  
               sklearn.neighbors._ball_tree.BallTree     10       duplicated       93.5±2μs     67.1±1μs     67.4±1μs     67.6±1μs  
               sklearn.neighbors._ball_tree.BallTree    100         random         95.6±2μs     91.9±2μs     103±2μs      122±2μs   
               sklearn.neighbors._ball_tree.BallTree    100        ordered         87.7±1μs     84.7±2μs     98.8±3μs     120±3μs   
               sklearn.neighbors._ball_tree.BallTree    100    reverse_ordered     89.5±2μs     86.5±2μs     96.3±2μs     169±3μs   
               sklearn.neighbors._ball_tree.BallTree    100       duplicated       354±8μs      126±2μs      117±2μs      116±4μs   
               sklearn.neighbors._ball_tree.BallTree    1000        random         116±3μs      120±3μs      134±3μs      163±3μs   
               sklearn.neighbors._ball_tree.BallTree    1000       ordered         101±2μs      103±3μs      113±2μs      142±4μs   
               sklearn.neighbors._ball_tree.BallTree    1000   reverse_ordered     99.2±2μs     102±2μs      114±3μs      142±3μs   
               sklearn.neighbors._ball_tree.BallTree    1000      duplicated     2.47±0.05ms    688±10μs     619±10μs     578±10μs  
               sklearn.neighbors._ball_tree.BallTree   10000        random         135±2μs      139±3μs      158±3μs      208±5μs   
               sklearn.neighbors._ball_tree.BallTree   10000       ordered         119±3μs      119±3μs      135±3μs      164±2μs   
               sklearn.neighbors._ball_tree.BallTree   10000   reverse_ordered     117±3μs      119±3μs      129±2μs      164±3μs   
               sklearn.neighbors._ball_tree.BallTree   10000      duplicated      35.0±0.6ms   6.34±0.1ms   5.74±0.1ms   5.45±0.1ms 
              ======================================= ======= ================= ============= ============ ============ ============

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment