awni/fleiss_kappa.py

## fleiss_kappa.py
import numpy as np

def fleiss_kappa(ratings):
    """
    Args:
        ratings: An N x R numpy array. N is the number of
            samples and R is the number of reviewers. Each
            entry (n, r) is the category assigned to example
            n by reviewer r.
    Returns:
        Fleiss' kappa score.

    https://en.wikipedia.org/wiki/Fleiss%27_kappa
    """
    N, R = ratings.shape
    NR =  N * R
    categories = set(ratings.ravel().tolist())
    P_example = -np.full(N, R)
    p_class = 0.0
    for c in categories:
        c_sum = np.sum(ratings == c, axis=1)
        P_example += c_sum**2
        p_class += (np.sum(c_sum) / float(NR)) ** 2
    P_example = np.sum(P_example) / float(NR * (R-1))
    k = (P_example - p_class) / (1 - p_class)
    return k

if __name__ == "__main__":
    ratings = np.array(
        [                                [5]*14,
                 [2]*2 + [3]*6 + [4]*4 + [5]*2,
                         [3]*3 + [4]*5 + [5]*6,
                 [2]*3 + [3]*9 + [4]*2,
         [1]*2 + [2]*2 + [3]*8 + [4]*1 + [5]*1,
         [1]*7 + [2]*7,
         [1]*3 + [2]*2 + [3]*6 + [4]*3,
         [1]*2 + [2]*5 + [3]*3 + [4]*2 + [5]*2,
         [1]*6 + [2]*5 + [3]*2 + [4]*1,
                 [2]*2 + [3]*2 + [4]*3 + [5]*7])
    fkappa = fleiss_kappa(ratings)
    # Should give: Fleiss' kappa = 0.210
    print("Fleiss' kappa = {:.3f}".format(fkappa))
	import numpy as np

	def fleiss_kappa(ratings):
	"""
	Args:
	ratings: An N x R numpy array. N is the number of
	samples and R is the number of reviewers. Each
	entry (n, r) is the category assigned to example
	n by reviewer r.
	Returns:
	Fleiss' kappa score.

	https://en.wikipedia.org/wiki/Fleiss%27_kappa
	"""
	N, R = ratings.shape
	NR = N * R
	categories = set(ratings.ravel().tolist())
	P_example = -np.full(N, R)
	p_class = 0.0
	for c in categories:
	c_sum = np.sum(ratings == c, axis=1)
	P_example += c_sum**2
	p_class += (np.sum(c_sum) / float(NR)) ** 2
	P_example = np.sum(P_example) / float(NR * (R-1))
	k = (P_example - p_class) / (1 - p_class)
	return k

	if __name__ == "__main__":
	ratings = np.array(
	[ [5]*14,
	[2]2 + [3]6 + [4]4 + [5]2,
	[3]3 + [4]5 + [5]*6,
	[2]3 + [3]9 + [4]*2,
	[1]2 + [2]2 + [3]8 + [4]1 + [5]*1,
	[1]7 + [2]7,
	[1]3 + [2]2 + [3]6 + [4]3,
	[1]2 + [2]5 + [3]3 + [4]2 + [5]*2,
	[1]6 + [2]5 + [3]2 + [4]1,
	[2]2 + [3]2 + [4]3 + [5]7])
	fkappa = fleiss_kappa(ratings)
	# Should give: Fleiss' kappa = 0.210
	print("Fleiss' kappa = {:.3f}".format(fkappa))