Python rank benchmarking
""" | |
Benchmarking the methods at | |
http://stackoverflow.com/questions/5284646/rank-items-in-an-array-using-python-numpy | |
""" | |
import timeit | |
from scipy.stats import rankdata | |
import pandas as pd | |
import numpy as np | |
def rank_arange(array): | |
temp = array.argsort() | |
ranks = np.empty(len(array), int) | |
ranks[temp] = np.arange(len(array)) | |
return(ranks) | |
def rank_argsort(array): | |
ranks = array.argsort().argsort() | |
return(ranks) | |
ns = [5, 10, 50, 100, 500, 1000, 5000, 10000] | |
funcs = ['rank_arange', 'rank_argsort', 'rankdata'] | |
results = pd.DataFrame(np.full((len(ns), 1 + len(funcs)), np.nan)) | |
results.columns = ['n'] + funcs | |
results['n'] = ns | |
for n in ns: | |
array = np.random.rand(n) | |
for f in funcs: | |
r = timeit.timeit('%s(array)' % f, | |
setup="from __main__ import array,%s" % f, | |
number=1000) | |
results.ix[results.n == n, f] = r | |
results | |
"""Results: | |
n rank_arange rank_argsort rankdata | |
0 5 0.008286 0.002283 0.020194 | |
1 10 0.004828 0.001600 0.019356 | |
2 50 0.007560 0.002222 0.019348 | |
3 100 0.004734 0.003443 0.016481 | |
4 500 0.012260 0.022422 0.033053 | |
5 1000 0.035825 0.097287 0.062478 | |
6 5000 0.443329 0.754238 0.492829 | |
7 10000 0.951714 1.658155 1.028654 | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment