Skip to content

Instantly share code, notes, and snippets.

@bwhite
Created September 15, 2012 03:23
Show Gist options
  • Save bwhite/3726239 to your computer and use it in GitHub Desktop.
Save bwhite/3726239 to your computer and use it in GitHub Desktop.
Ranking Metrics
"""Information Retrieval metrics
Useful Resources:
http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
http://www.nii.ac.jp/TechReports/05-014E.pdf
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
Learning to Rank for Information Retrieval (Tie-Yan Liu)
"""
import numpy as np
def mean_reciprocal_rank(rs):
"""Score is reciprocal of the rank of the first relevant item
First element is 'rank 1'. Relevance is binary (nonzero is relevant).
Example from http://en.wikipedia.org/wiki/Mean_reciprocal_rank
>>> rs = [[0, 0, 1], [0, 1, 0], [1, 0, 0]]
>>> mean_reciprocal_rank(rs)
0.61111111111111105
>>> rs = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]])
>>> mean_reciprocal_rank(rs)
0.5
>>> rs = [[0, 0, 0, 1], [1, 0, 0], [1, 0, 0]]
>>> mean_reciprocal_rank(rs)
0.75
Args:
rs: Iterator of relevance scores (list or numpy) in rank order
(first element is the first item)
Returns:
Mean reciprocal rank
"""
rs = (np.asarray(r).nonzero()[0] for r in rs)
return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs])
def r_precision(r):
"""Score is precision after all relevant documents have been retrieved
Relevance is binary (nonzero is relevant).
>>> r = [0, 0, 1]
>>> r_precision(r)
0.33333333333333331
>>> r = [0, 1, 0]
>>> r_precision(r)
0.5
>>> r = [1, 0, 0]
>>> r_precision(r)
1.0
Args:
r: Relevance scores (list or numpy) in rank order
(first element is the first item)
Returns:
R Precision
"""
r = np.asarray(r) != 0
z = r.nonzero()[0]
if not z.size:
return 0.
return np.mean(r[:z[-1] + 1])
def precision_at_k(r, k):
"""Score is precision @ k
Relevance is binary (nonzero is relevant).
>>> r = [0, 0, 1]
>>> precision_at_k(r, 1)
0.0
>>> precision_at_k(r, 2)
0.0
>>> precision_at_k(r, 3)
0.33333333333333331
>>> precision_at_k(r, 4)
Traceback (most recent call last):
File "<stdin>", line 1, in ?
ValueError: Relevance score length < k
Args:
r: Relevance scores (list or numpy) in rank order
(first element is the first item)
Returns:
Precision @ k
Raises:
ValueError: len(r) must be >= k
"""
assert k >= 1
r = np.asarray(r)[:k] != 0
if r.size != k:
raise ValueError('Relevance score length < k')
return np.mean(r)
def average_precision(r):
"""Score is average precision (area under PR curve)
Relevance is binary (nonzero is relevant).
>>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
>>> delta_r = 1. / sum(r)
>>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y])
0.7833333333333333
>>> average_precision(r)
0.78333333333333333
Args:
r: Relevance scores (list or numpy) in rank order
(first element is the first item)
Returns:
Average precision
"""
r = np.asarray(r) != 0
out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]]
if not out:
return 0.
return np.mean(out)
def mean_average_precision(rs):
"""Score is mean average precision
Relevance is binary (nonzero is relevant).
>>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]]
>>> mean_average_precision(rs)
0.78333333333333333
>>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]]
>>> mean_average_precision(rs)
0.39166666666666666
Args:
rs: Iterator of relevance scores (list or numpy) in rank order
(first element is the first item)
Returns:
Mean average precision
"""
return np.mean([average_precision(r) for r in rs])
def dcg_at_k(r, k, method=0):
"""Score is discounted cumulative gain (dcg)
Relevance is positive real values. Can use binary
as the previous methods.
Example from
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
>>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
>>> dcg_at_k(r, 1)
3.0
>>> dcg_at_k(r, 1, method=1)
3.0
>>> dcg_at_k(r, 2)
5.0
>>> dcg_at_k(r, 2, method=1)
4.2618595071429155
>>> dcg_at_k(r, 10)
9.6051177391888114
>>> dcg_at_k(r, 11)
9.6051177391888114
Args:
r: Relevance scores (list or numpy) in rank order
(first element is the first item)
k: Number of results to consider
method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
Returns:
Discounted cumulative gain
"""
r = np.asfarray(r)[:k]
if r.size:
if method == 0:
return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
elif method == 1:
return np.sum(r / np.log2(np.arange(2, r.size + 2)))
else:
raise ValueError('method must be 0 or 1.')
return 0.
def ndcg_at_k(r, k, method=0):
"""Score is normalized discounted cumulative gain (ndcg)
Relevance is positive real values. Can use binary
as the previous methods.
Example from
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
>>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
>>> ndcg_at_k(r, 1)
1.0
>>> r = [2, 1, 2, 0]
>>> ndcg_at_k(r, 4)
0.9203032077642922
>>> ndcg_at_k(r, 4, method=1)
0.96519546960144276
>>> ndcg_at_k([0], 1)
0.0
>>> ndcg_at_k([1], 2)
1.0
Args:
r: Relevance scores (list or numpy) in rank order
(first element is the first item)
k: Number of results to consider
method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
Returns:
Normalized discounted cumulative gain
"""
dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
if not dcg_max:
return 0.
return dcg_at_k(r, k, method) / dcg_max
if __name__ == "__main__":
import doctest
doctest.testmod()
@jun8git
Copy link

jun8git commented Nov 15, 2022

method variable distinguishes between two ways of giving weights to relevances while calculating DCG

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment