mtreviso/compute_kendall_taus.py

## compute_kendall_taus.py
import numpy as np


def compute_kendall_taus(h, m):
    """
    Compute multiple variants of Kendall's Tau correlations between two rank arrays.

    This function calculates several variants of Kendall's Tau correlations (tau_a, tau_b, tau_c,
    tau_10, tau_13, tau_14, tau_23) between two input rank arrays. It employs vectorized operations
    for computation efficiency and can handle tied ranks. This function is an implementation based on
    the methods discussed in the paper "Ties Matter: Modifying Kendall's Tau for Modern Metric Meta-Evaluation",
    available at https://arxiv.org/abs/2305.14324.

    Parameters
    ----------
    h, m : array-like
        The input rank arrays. They should have the same shape. If arrays are not 1-D, they will be
        flattened to 1-D.

    Returns
    -------
    dict
        A dictionary containing the computed Kendall's Tau correlations: tau_a, tau_b, tau_c, tau_10,
        tau_13, tau_14, tau_23. Values close to 1 indicate strong agreement between the ranks,
        whereas values close to -1 indicate strong disagreement.

    Notes
    -----
    The current implementation has a time complexity of O(n^2) as it compares all possible pairs.
    For a more efficient computation, one could sort the input rank arrays 'h' and 'm' before comparing
    pairs, which would bring down the time complexity to O(n log n).
    """

    # Ensure input arrays are numpy arrays and are 1D
    h = np.array(h).flatten()
    m = np.array(m).flatten()

    # Check that arrays are of the same size
    assert h.size == m.size, "Input arrays must be of the same size"

    # Count of pairs
    n = h.size

    # Minimum count of unique values in h and m
    k = min(len(np.unique(h)), len(np.unique(m)))

    # Create 2D matrices of differences
    h_diff = np.subtract.outer(h, h)
    m_diff = np.subtract.outer(m, m)

    # Create 2D matrices of sign of differences
    h_sign = np.sign(h_diff)
    m_sign = np.sign(m_diff)

    # Masks for concordant, discordant, and tied pairs
    concordant_mask = (h_sign == m_sign) & (h_sign != 0)
    discordant_mask = (h_sign != m_sign) & (h_sign != 0) & (m_sign != 0)
    ties_in_h_mask = (h_sign == 0) & (m_sign != 0)
    ties_in_m_mask = (h_sign != 0) & (m_sign == 0)
    ties_in_both_mask = (h_sign == 0) & (m_sign == 0)

    # Sum of lower triangular elements without the main diagonal
    C = np.tril(concordant_mask, -1).sum()
    D = np.tril(discordant_mask, -1).sum()
    Th = np.tril(ties_in_h_mask, -1).sum()
    Tm = np.tril(ties_in_m_mask, -1).sum()
    Thm = np.tril(ties_in_both_mask, -1).sum()

    # Tau calculations
    tau_a = (C - D) / (C + D + Th + Tm + Thm)
    tau_b = (C - D) / ((C + D + Th) * (C + D + Tm)) ** 0.5
    tau_c = (C - D) / (n*n * (k-1) / k)
    tau_10 = (C - D - Tm) / (C + D + Tm)
    tau_13 = (C - D) / (C + D)
    tau_14 = (C - D) / (C + D + Tm)
    tau_23 = (C + Thm - D - Th - Tm) / (C + D + Th + Tm + Thm)

    # Accuracy calculation
    acc_23 = (C + Thm) / (C + D + Th + Tm + Thm)

    return {
        'C': C,
        'D': D,
        'Th': Th,
        'Tm': Tm,
        'Thm': Thm,
        'tau_a': tau_a,
        'tau_b': tau_b,
        'tau_c': tau_c,
        'tau_10': tau_10,
        'tau_13': tau_13,
        'tau_14': tau_14,
        'tau_23': tau_23,
        'acc_23': acc_23
    }
	import numpy as np


	def compute_kendall_taus(h, m):
	"""
	Compute multiple variants of Kendall's Tau correlations between two rank arrays.

	This function calculates several variants of Kendall's Tau correlations (tau_a, tau_b, tau_c,
	tau_10, tau_13, tau_14, tau_23) between two input rank arrays. It employs vectorized operations
	for computation efficiency and can handle tied ranks. This function is an implementation based on
	the methods discussed in the paper "Ties Matter: Modifying Kendall's Tau for Modern Metric Meta-Evaluation",
	available at https://arxiv.org/abs/2305.14324.

	Parameters
	----------
	h, m : array-like
	The input rank arrays. They should have the same shape. If arrays are not 1-D, they will be
	flattened to 1-D.

	Returns
	-------
	dict
	A dictionary containing the computed Kendall's Tau correlations: tau_a, tau_b, tau_c, tau_10,
	tau_13, tau_14, tau_23. Values close to 1 indicate strong agreement between the ranks,
	whereas values close to -1 indicate strong disagreement.

	Notes
	-----
	The current implementation has a time complexity of O(n^2) as it compares all possible pairs.
	For a more efficient computation, one could sort the input rank arrays 'h' and 'm' before comparing
	pairs, which would bring down the time complexity to O(n log n).
	"""

	# Ensure input arrays are numpy arrays and are 1D
	h = np.array(h).flatten()
	m = np.array(m).flatten()

	# Check that arrays are of the same size
	assert h.size == m.size, "Input arrays must be of the same size"

	# Count of pairs
	n = h.size

	# Minimum count of unique values in h and m
	k = min(len(np.unique(h)), len(np.unique(m)))

	# Create 2D matrices of differences
	h_diff = np.subtract.outer(h, h)
	m_diff = np.subtract.outer(m, m)

	# Create 2D matrices of sign of differences
	h_sign = np.sign(h_diff)
	m_sign = np.sign(m_diff)

	# Masks for concordant, discordant, and tied pairs
	concordant_mask = (h_sign == m_sign) & (h_sign != 0)
	discordant_mask = (h_sign != m_sign) & (h_sign != 0) & (m_sign != 0)
	ties_in_h_mask = (h_sign == 0) & (m_sign != 0)
	ties_in_m_mask = (h_sign != 0) & (m_sign == 0)
	ties_in_both_mask = (h_sign == 0) & (m_sign == 0)

	# Sum of lower triangular elements without the main diagonal
	C = np.tril(concordant_mask, -1).sum()
	D = np.tril(discordant_mask, -1).sum()
	Th = np.tril(ties_in_h_mask, -1).sum()
	Tm = np.tril(ties_in_m_mask, -1).sum()
	Thm = np.tril(ties_in_both_mask, -1).sum()

	# Tau calculations
	tau_a = (C - D) / (C + D + Th + Tm + Thm)
	tau_b = (C - D) / ((C + D + Th) * (C + D + Tm)) ** 0.5
	tau_c = (C - D) / (nn (k-1) / k)
	tau_10 = (C - D - Tm) / (C + D + Tm)
	tau_13 = (C - D) / (C + D)
	tau_14 = (C - D) / (C + D + Tm)
	tau_23 = (C + Thm - D - Th - Tm) / (C + D + Th + Tm + Thm)

	# Accuracy calculation
	acc_23 = (C + Thm) / (C + D + Th + Tm + Thm)

	return {
	'C': C,
	'D': D,
	'Th': Th,
	'Tm': Tm,
	'Thm': Thm,
	'tau_a': tau_a,
	'tau_b': tau_b,
	'tau_c': tau_c,
	'tau_10': tau_10,
	'tau_13': tau_13,
	'tau_14': tau_14,
	'tau_23': tau_23,
	'acc_23': acc_23
	}