Created
September 22, 2014 23:49
-
-
Save skuschel/1599698bd271be63284f to your computer and use it in GitHub Desktop.
Benchmark between numpys bincount and searchsorted functions. ipython notebook.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:47fd176dfa0c93311b89f99a57b966e98062a2e958745ba99fd68a4f7f94ed68" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy as np\n", | |
"import timeit\n", | |
"data = np.random.random(500e3)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Digitize performs better on low bin count. Exremest possible example: just 2 bins." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"bins = np.linspace(0,1,2)\n", | |
"%timeit np.digitize(data, bins)\n", | |
"%timeit np.searchsorted(bins, data, side='right')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"100 loops, best of 3: 2.43 ms per loop\n", | |
"100 loops, best of 3: 6.01 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 51 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"With 72 bins, both algorithm have equal runtime. This is independent of number of samples." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"bins = np.linspace(0,1,72)\n", | |
"%timeit np.digitize(data, bins)\n", | |
"%timeit np.searchsorted(bins, data, side='right')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"10 loops, best of 3: 29.9 ms per loop\n", | |
"10 loops, best of 3: 30.5 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 52 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"On higher bin count, searchsorted is much faster" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print 200\n", | |
"bins = np.linspace(0,1,200)\n", | |
"%timeit np.digitize(data, bins)\n", | |
"%timeit np.searchsorted(bins, data, side='right')\n", | |
"print 400\n", | |
"bins = np.linspace(0,1,400)\n", | |
"%timeit np.digitize(data, bins)\n", | |
"%timeit np.searchsorted(bins, data, side='right')\n", | |
"print 1000\n", | |
"bins = np.linspace(0,1,1000)\n", | |
"%timeit np.digitize(data, bins)\n", | |
"%timeit np.searchsorted(bins, data, side='right')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"200\n", | |
"10 loops, best of 3: 71.3 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"10 loops, best of 3: 38.7 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"400\n", | |
"10 loops, best of 3: 136 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"10 loops, best of 3: 43.7 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"1000\n", | |
"1 loops, best of 3: 331 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"10 loops, best of 3: 49.7 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 54 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment