Skip to content

Instantly share code, notes, and snippets.

@jaimefrio
Created January 14, 2014 19:33
Show Gist options
  • Save jaimefrio/8424247 to your computer and use it in GitHub Desktop.
Save jaimefrio/8424247 to your computer and use it in GitHub Desktop.
Timings for several variants of `np.digitize` and `np.searchsorted`
from __future__ import division
import numpy as np
import timeit
import matplotlib.pyplot as plt
haystack_sizes = np.linspace(2, 250, num=20, endpoint=True)
needle_sizes = np.array([10, 20, 50, 100, 200, 500 ,1000, 10000, 20000])
codes = ['np.searchsorted(haystack, needle)',
'np.fastsearchsorted(haystack, needle)',
'np.digitize(needle, haystack)',
'np.bindigitize(needle, haystack)',]
setup_code = 'from __main__ import np, needle, haystack'
timings = np.empty((3, len(haystack_sizes), len(needle_sizes), len(codes)))
for j, haystack_size in enumerate(haystack_sizes):
haystack = np.linspace(0, 1, num=haystack_size, endpoint=True)
for k, needle_size in enumerate(needle_sizes):
data = np.random.rand(needle_size)
data = data - data.mean()
for d, needle in enumerate((data + 0.5, data*0.5 + 0.25,
data*0.5 + 0.75)):
for l, code in enumerate(codes):
t = np.min(timeit.repeat(code, setup_code, repeat=1, number=10))
timings[d, j, k, l] = np.min(timeit.repeat(code, setup_code,
repeat=int(.1/t), number=10))
print j, k, code, timings[d, j, k, l]
cols = 3
rows = (len(needle_sizes) - 1) // cols + 1
for k, needle_size in enumerate(needle_sizes):
plt.subplot(rows, cols, k+1)
plt.title('needle = {}'.format(needle_size))
for d in range(3):
plt.plot(haystack_sizes, timings[d, :, k, 0], 'r-')
plt.plot(haystack_sizes, timings[d, :, k, 1], 'g-')
plt.plot(haystack_sizes, timings[d, :, k, 2], 'b-')
plt.plot(haystack_sizes, timings[d, :, k, 3], 'k-')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment