Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Python rank benchmarking
"""
Benchmarking the methods at
http://stackoverflow.com/questions/5284646/rank-items-in-an-array-using-python-numpy
"""
import timeit
from scipy.stats import rankdata
import pandas as pd
import numpy as np
def rank_arange(array):
temp = array.argsort()
ranks = np.empty(len(array), int)
ranks[temp] = np.arange(len(array))
return(ranks)
def rank_argsort(array):
ranks = array.argsort().argsort()
return(ranks)
ns = [5, 10, 50, 100, 500, 1000, 5000, 10000]
funcs = ['rank_arange', 'rank_argsort', 'rankdata']
results = pd.DataFrame(np.full((len(ns), 1 + len(funcs)), np.nan))
results.columns = ['n'] + funcs
results['n'] = ns
for n in ns:
array = np.random.rand(n)
for f in funcs:
r = timeit.timeit('%s(array)' % f,
setup="from __main__ import array,%s" % f,
number=1000)
results.ix[results.n == n, f] = r
results
"""Results:
n rank_arange rank_argsort rankdata
0 5 0.008286 0.002283 0.020194
1 10 0.004828 0.001600 0.019356
2 50 0.007560 0.002222 0.019348
3 100 0.004734 0.003443 0.016481
4 500 0.012260 0.022422 0.033053
5 1000 0.035825 0.097287 0.062478
6 5000 0.443329 0.754238 0.492829
7 10000 0.951714 1.658155 1.028654
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment