Skip to content

Instantly share code, notes, and snippets.

@Martins6
Created November 18, 2022 13:09
Show Gist options
  • Save Martins6/bec501419aeb287c9bee6e43765c2cf9 to your computer and use it in GitHub Desktop.
Save Martins6/bec501419aeb287c9bee6e43765c2cf9 to your computer and use it in GitHub Desktop.
Calculate the multinomial goodness of fit through Log Likelihood Ratio test statistic with the null distribution calculated via Monte Carlo simulation.
def multinomial_goodness_of_fit(
f_obs,
p_exp=None,
b=1000,
delta=0
) -> list:
"""
Calculate the multinomial goodness of fit through Log Likelihood Ratio test statistic with the
null distribution calculated via Monte Carlo simulation.
Args:
f_obs (np.array): The observed frequency in each category.
p_exp (np.array): Defaults to None. The expected probability in each category. Our H0, in other words.
If None it give equal probability to every category.
b (int): Positive number. The number of times to generate the samples from the
multinomial and calculate our test statistic.
delta (float): Positive number to deal with categories that didn't occur.
Returns:
statistic (float): the observed choosen statistic.
p-value (float): the p-value of the observed statistic.
References:
https://www.biostat.wisc.edu/~kbroman/teaching/labstat/fourth/notes02.pdf
https://cran.r-project.org/web/packages/XNomial/vignettes/XNomial.html
https://en.wikipedia.org/wiki/G-test
https://en.wikipedia.org/wiki/Multinomial_test
"""
f_obs_delta = f_obs + delta
def llr_stat(f_obs, p_exp, p = False):
results = np.zeros(f_obs.shape)
for i, value in enumerate(f_obs):
if (f_obs[i] != 0) & (p_exp[i] > 0):
results[i] = 2 * (f_obs[i] * np.log( (f_obs[i]/f_obs.sum()) / p_exp[i]))
return results.sum()
rng = np.random.default_rng()
if p_exp is None:
p_exp = np.repeat(1/f_obs.size, f_obs.size)
mult_samples = rng.multinomial(f_obs.sum(), p_exp, size=b) + delta
test_statistic_simulation_result = np.zeros((b,))
for i, f_exp_sample in enumerate(mult_samples):
statistic_sample = llr_stat(f_exp_sample, p_exp)
test_statistic_simulation_result[i] = statistic_sample
obs_statistics = llr_stat(f_obs_delta, p_exp)
p_value = (test_statistic_simulation_result >= obs_statistics).mean()
return obs_statistics, p_value, test_statistic_simulation_result
def multinomial_adjust(arr, c=100):
arr = arr * c
arr[arr == 0] = 1
return arr
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment