Skip to content

Instantly share code, notes, and snippets.

@clane9
Last active June 6, 2024 18:35
Show Gist options
  • Save clane9/964c52650f41540c1ad21dad1f247e6f to your computer and use it in GitHub Desktop.
Save clane9/964c52650f41540c1ad21dad1f247e6f to your computer and use it in GitHub Desktop.
Compute batch intraclass correlation coefficients in python
from typing import NamedTuple
import numpy as np
class ICCResult(NamedTuple):
icc1: np.ndarray
icc2: np.ndarray
icc3: np.ndarray
icc1k: np.ndarray
icc2k: np.ndarray
icc3k: np.ndarray
def batch_icc(data: np.ndarray) -> ICCResult:
"""
Compute batch intraclass correlation coefficients.
Args:
x: array of repeated measuresments tables, shape (..., n_subs, n_ratings)
Returns:
Named tuple of batch ICC results.
Example:
>>> import numpy as np
>>> # example wine data copied from pingouin
>>> data = np.array(
... [[1, 2, 0, 1],
... [1, 3, 3, 2],
... [3, 8, 1, 4],
... [6, 4, 3, 3],
... [6, 5, 5, 6],
... [7, 5, 6, 2],
... [8, 7, 7, 9],
... [9, 9, 9, 8]]
... )
>>> icc = batch_icc(data)
>>> print(icc.icc2k)
0.9144500359453631
References:
https://github.com/raphaelvallat/pingouin/blob/b563de7f4c7ea35467d40a675d02a0018c67dc24/src/pingouin/reliability.py#L159
https://github.com/cran/psych/blob/724ef67cfd88aa5e8cf17f297cbf94e0c72f874d/R/ICC.R#L4
https://github.com/TingsterX/ReX/blob/main/R/lme_ICC.R
Koo & Li, 2016
McGraw & Wong, 1996
"""
assert data.ndim >= 2, "data should be at least 2d"
# subject (row) and ratings (column) axes
I, J = -2, -1
N, K = data.shape[I], data.shape[J]
# handle missing values with list-wise deletion
mask = ~np.isnan(data)
row_mask = np.all(mask, axis=J, keepdims=True)
mask = np.repeat(row_mask, axis=J, repeats=K)
col_mask = np.any(mask, axis=I, keepdims=True)
data = np.where(mask, data, 0.0)
# number of effective rows/cols per table might vary depending on missing values
n_row = np.sum(row_mask, axis=I, keepdims=True)
n_col = np.sum(col_mask, axis=J, keepdims=True)
n_total = np.sum(mask, axis=(I, J), keepdims=True)
assert np.min(n_row) >= 2, "at least 2 valid rows for each table are required"
assert np.min(n_col) >= 2, "at least 2 valid columns for each table are required"
grand_mean = np.sum(data, axis=(I, J), keepdims=True) / n_total
row_mean = np.sum(data, axis=J, keepdims=True) / n_col
col_mean = np.sum(data, axis=I, keepdims=True) / n_row
# simple sum and mean of squares, no anova required
ss_row = n_col * np.sum((row_mean - grand_mean) ** 2, axis=I, keepdims=True)
ss_col = n_row * np.sum((col_mean - grand_mean) ** 2, axis=J, keepdims=True)
ss_tot = np.sum((data - grand_mean) ** 2, axis=(I, J), keepdims=True)
ss_err = ss_tot - (ss_row + ss_col)
df_row = n_row - 1
df_col = n_col - 1
df_err = df_row * df_col
msr = ss_row / df_row
msc = ss_col / df_col
mse = ss_err / df_err
msw = (ss_col + ss_err) / (df_col + df_err)
# ICC formulas from pingouin intraclass_corr following notation of Koo & Li, 2016
icc1 = squeeze((msr - msw) / (msr + df_col * msw))
icc2 = squeeze((msr - mse) / (msr + df_col * mse + n_col * (msc - mse) / n_row))
icc3 = squeeze((msr - mse) / (msr + df_col * mse))
icc1k = squeeze((msr - msw) / msr)
icc2k = squeeze((msr - mse) / (msr + (msc - mse) / n_row))
icc3k = squeeze((msr - mse) / msr)
return ICCResult(icc1, icc2, icc3, icc1k, icc2k, icc3k)
def squeeze(x: np.ndarray) -> float | np.ndarray:
x = np.squeeze(x)
if x.ndim == 0:
x = x.item()
return x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment