Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created September 12, 2011 02:21
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marcelcaraciolo/1210462 to your computer and use it in GitHub Desktop.
Save marcelcaraciolo/1210462 to your computer and use it in GitHub Desktop.
spearman coefficient
import datetime
import sys
import random
def _rank_dists(ranks1, ranks2):
"""Finds the difference between the values in ranks1 and ranks2 for keys
present in both dicts. If the arguments are not dicts, they are converted
from (key, rank) sequences.
"""
ranks1 = dict(ranks1)
ranks2 = dict(ranks2)
for k, v1 in ranks1.iteritems():
try:
yield k, v1 - ranks2[k]
except KeyError:
pass
def spearman_correlation(ranks1, ranks2):
"""Returns the Spearman correlation coefficient for two rankings, which
should be dicts or sequences of (key, rank). The coefficient ranges from
-1.0 (ranks are opposite) to 1.0 (ranks are identical), and is only
calculated for keys in both rankings (for meaningful results, remove keys
present in only one list before ranking)."""
n = 0
res = 0
ranks1 = sorted(ranks1, key=lambda k: -k[1])
ranks2 = sorted(ranks2, key=lambda k: -k[1])
ranks1 = [(t[0], ix) for ix, t in enumerate(ranks1)]
ranks2 = [(t[0], ix) for ix, t in enumerate(ranks2)]
for k, d in _rank_dists(ranks1, ranks2):
res += d * d
n += 1
try:
return 1 - (6 * float(res) / (n * (n * n - 1)))
except ZeroDivisionError:
# Result is undefined if only one item is ranked
return 0.0
if __name__ == '__main__':
n = sys.argv[1]
n = int(n)
possible_items_x = range(1, n+1)
possible_items_y = range(1, n+1)
random.shuffle(possible_items_y)
random.shuffle(possible_items_x)
ranks1 = []
ranks2 = []
for x in xrange(n):
item1 = possible_items_x.pop()
item2 = possible_items_y.pop()
ranks1.append((item1, random.random() * 4.0 + 1.0))
ranks2.append((item2, random.random() * 4.0 + 1.0))
start_time = datetime.datetime.now()
print spearman_correlation(ranks1, ranks2)
end_time = datetime.datetime.now()
secs = end_time - start_time
print "Main took", secs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment