Skip to content

Instantly share code, notes, and snippets.

@inaz2
Created November 3, 2023 04:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save inaz2/fca55709d356743f917c5fb10bb33fc6 to your computer and use it in GitHub Desktop.
Save inaz2/fca55709d356743f917c5fb10bb33fc6 to your computer and use it in GitHub Desktop.
Exact multinomial test of goodness-of-fit by Scipy
$ python multinom_test.py
H0: the observed frequeicies follows the expected ones
H1: the observed frequeicies does not follow the expected ones
observed_frequencies = [0, 10, 6, 4, 5, 5]
alpha = 0.05
chisquare: p = 0.064663, H0 is NOT rejected
multinom_test: p = 0.030745, H0 is rejected (# of possible events = 324632)
import numpy as np
from scipy import stats, special
import functools
# https://rpkgs.datanovia.com/rstatix/reference/multinom_test.html
# https://github.com/kassambara/rstatix/blob/v0.7.2/R/multinom_test.R
def multinom_test(x, p=None):
x = np.array(x)
if x.ndim != 1:
raise ValueError("'x' must be a vector")
if p is None:
p = np.repeat(1/len(x), len(x))
if not np.isclose(np.sum(p), 1):
raise ValueError("sum of probabilities must be 1")
if len(x) != len(p):
raise ValueError("'x' and 'p' lengths differ")
size = np.sum(x)
groups = len(x)
num_events = int(special.comb(size + groups - 1, groups - 1))
p_obs = stats.multinomial.pmf(x, size, p)
# use memoization to reduce repeated calls
@functools.cache
def find_vectors(groups, size):
if groups == 1:
mat = size
else:
mat = np.zeros((1, groups - 1))
for i in range(1, size + 1):
mat = np.vstack((mat, find_vectors(groups - 1, i)))
mat = np.hstack((mat, size - np.sum(mat, axis=1).reshape(-1, 1)))
return mat
event_mat = find_vectors(groups, size)
event_prob = stats.multinomial.pmf(event_mat, size, p)
p_val = np.sum(event_prob[event_prob <= p_obs])
return {'pvalue': p_val, 'num_events': num_events}
# observed frequeicies from discrete uniform distribution
# https://biolab.sakura.ne.jp/multinomial-test.html
observed_frequencies = [0, 10, 6, 4, 5, 5]
# significance level
alpha = 0.05
print("""H0: the observed frequeicies follows the expected ones
H1: the observed frequeicies does not follow the expected ones
observed_frequencies = {}
alpha = {}
""".format(observed_frequencies, alpha))
result = stats.chisquare(observed_frequencies)
s = "H0 is rejected" if result.pvalue < alpha else "H0 is NOT rejected"
print(" chisquare: p = {:f}, {}".format(result.pvalue, s))
result = multinom_test(observed_frequencies)
s = "H0 is rejected" if result["pvalue"] < alpha else "H0 is NOT rejected"
print("multinom_test: p = {:f}, {} (# of possible events = {})".format(result["pvalue"], s, result["num_events"]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment