Skip to content

Instantly share code, notes, and snippets.

@jseabold
Last active May 8, 2024 21:22
Show Gist options
  • Save jseabold/4e1f42c089f317844d8f17a2d49bc299 to your computer and use it in GitHub Desktop.
Save jseabold/4e1f42c089f317844d8f17a2d49bc299 to your computer and use it in GitHub Desktop.
Simulate Inter-Rater Agreement
import numpy as np
import pandas as pd
from scipy import stats
from statsmodels.stats import inter_rater
def fleiss_standard_error(table):
# not included in statsmodels
# only returns overall kappa value
n, k = table.shape # n_subjects, n_choices
m = table.sum(axis=1)[0] # assume they all have the same ratings count
p_bar = table.sum(axis=0) / (n * m)
q_bar = 1 - p_bar
return (
(2 ** .5 / (p_bar.dot(q_bar) * np.sqrt(n * m * (m - 1))))
* (
(p_bar.dot(q_bar) ** 2) - np.sum(p_bar * q_bar * (q_bar - p_bar))
) ** .5
)
def simulate_rater_agreement(probs, n_raters, all_nobs, random_state=12345):
rng = np.random.RandomState(random_state)
results = []
for nobs in all_nobs:
ratings = []
for rater in range(n_raters):
ratings.append(
np.vstack([
rng.multinomial(1, p, size=nobs) for p in probs
]).argmax(axis=1)
)
if n_raters > 2:
table = inter_rater.aggregate_raters(np.column_stack(ratings))[0]
test_result = dict()
test_result['fleiss_kappa'] = inter_rater.fleiss_kappa(table)
test_result['Z'] = (
test_result['fleiss_kappa'] / fleiss_standard_error(table)
)
test_result['prob'] = 1 - stats.norm.cdf(test_result['Z'])
test_result['nobs'] = nobs
else:
scores = inter_rater.to_table(np.column_stack(ratings))[0]
test_result = inter_rater.cohens_kappa(scores)
test_result['nobs'] = nobs
results.append(test_result)
return pd.DataFrame(results)
# assume that we have 5 possibilities on a likert scale, 1-5
# assume they're never off by more than 1
ordinal_probabilities1 = [0.05, .9, .05, 0, 0] # 90% accurate on first set
ordinal_probabilities2 = [0, .1, .8, .1, 0] # 80% accurate on second set
ordinal_probabilities3 = [0, 0, .125, .75, 0.125] # 75% accurate on last set
probs = np.c_[
ordinal_probabilities1, ordinal_probabilities2, ordinal_probabilities3
].T
simulations_two = simulate_rater_agreement(
probs, 2, [10, 50, 100, 500, 1000]
).drop(['distribution_zero_null', 'distribution_kappa'], axis=1)
simulations_three = simulate_rater_agreement(
probs, 3, [10, 50, 100, 500, 1000]
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment