Created
September 12, 2011 02:21
-
-
Save marcelcaraciolo/1210462 to your computer and use it in GitHub Desktop.
spearman coefficient
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import sys | |
import random | |
def _rank_dists(ranks1, ranks2): | |
"""Finds the difference between the values in ranks1 and ranks2 for keys | |
present in both dicts. If the arguments are not dicts, they are converted | |
from (key, rank) sequences. | |
""" | |
ranks1 = dict(ranks1) | |
ranks2 = dict(ranks2) | |
for k, v1 in ranks1.iteritems(): | |
try: | |
yield k, v1 - ranks2[k] | |
except KeyError: | |
pass | |
def spearman_correlation(ranks1, ranks2): | |
"""Returns the Spearman correlation coefficient for two rankings, which | |
should be dicts or sequences of (key, rank). The coefficient ranges from | |
-1.0 (ranks are opposite) to 1.0 (ranks are identical), and is only | |
calculated for keys in both rankings (for meaningful results, remove keys | |
present in only one list before ranking).""" | |
n = 0 | |
res = 0 | |
ranks1 = sorted(ranks1, key=lambda k: -k[1]) | |
ranks2 = sorted(ranks2, key=lambda k: -k[1]) | |
ranks1 = [(t[0], ix) for ix, t in enumerate(ranks1)] | |
ranks2 = [(t[0], ix) for ix, t in enumerate(ranks2)] | |
for k, d in _rank_dists(ranks1, ranks2): | |
res += d * d | |
n += 1 | |
try: | |
return 1 - (6 * float(res) / (n * (n * n - 1))) | |
except ZeroDivisionError: | |
# Result is undefined if only one item is ranked | |
return 0.0 | |
if __name__ == '__main__': | |
n = sys.argv[1] | |
n = int(n) | |
possible_items_x = range(1, n+1) | |
possible_items_y = range(1, n+1) | |
random.shuffle(possible_items_y) | |
random.shuffle(possible_items_x) | |
ranks1 = [] | |
ranks2 = [] | |
for x in xrange(n): | |
item1 = possible_items_x.pop() | |
item2 = possible_items_y.pop() | |
ranks1.append((item1, random.random() * 4.0 + 1.0)) | |
ranks2.append((item2, random.random() * 4.0 + 1.0)) | |
start_time = datetime.datetime.now() | |
print spearman_correlation(ranks1, ranks2) | |
end_time = datetime.datetime.now() | |
secs = end_time - start_time | |
print "Main took", secs |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment