inaz2/multinom_test.py

## gistfile1.txt
$ python multinom_test.py
H0: the observed frequeicies follows the expected ones
H1: the observed frequeicies does not follow the expected ones
observed_frequencies = [0, 10, 6, 4, 5, 5]
alpha = 0.05

    chisquare: p = 0.064663, H0 is NOT rejected
multinom_test: p = 0.030745, H0 is rejected (# of possible events = 324632)

## multinom_test.py
import numpy as np
from scipy import stats, special
import functools

# https://rpkgs.datanovia.com/rstatix/reference/multinom_test.html
# https://github.com/kassambara/rstatix/blob/v0.7.2/R/multinom_test.R
def multinom_test(x, p=None):
    x = np.array(x)
    if x.ndim != 1:
        raise ValueError("'x' must be a vector")
    if p is None:
        p = np.repeat(1/len(x), len(x))
    if not np.isclose(np.sum(p), 1):
        raise ValueError("sum of probabilities must be 1")
    if len(x) != len(p):
        raise ValueError("'x' and 'p' lengths differ")

    size = np.sum(x)
    groups = len(x)
    num_events = int(special.comb(size + groups - 1, groups - 1))
    p_obs = stats.multinomial.pmf(x, size, p)

    # use memoization to reduce repeated calls
    @functools.cache
    def find_vectors(groups, size):
        if groups == 1:
            mat = size
        else:
            mat = np.zeros((1, groups - 1))
            for i in range(1, size + 1):
                mat = np.vstack((mat, find_vectors(groups - 1, i)))
            mat = np.hstack((mat, size - np.sum(mat, axis=1).reshape(-1, 1)))
        return mat

    event_mat = find_vectors(groups, size)
    event_prob = stats.multinomial.pmf(event_mat, size, p)
    p_val = np.sum(event_prob[event_prob <= p_obs])
    return {'pvalue': p_val, 'num_events': num_events}

# observed frequeicies from discrete uniform distribution
# https://biolab.sakura.ne.jp/multinomial-test.html
observed_frequencies = [0, 10, 6, 4, 5, 5]

# significance level
alpha = 0.05

print("""H0: the observed frequeicies follows the expected ones
H1: the observed frequeicies does not follow the expected ones
observed_frequencies = {}
alpha = {}
""".format(observed_frequencies, alpha))

result = stats.chisquare(observed_frequencies)
s = "H0 is rejected" if result.pvalue < alpha else "H0 is NOT rejected"
print("    chisquare: p = {:f}, {}".format(result.pvalue, s))

result = multinom_test(observed_frequencies)
s = "H0 is rejected" if result["pvalue"] < alpha else "H0 is NOT rejected"
print("multinom_test: p = {:f}, {} (# of possible events = {})".format(result["pvalue"], s, result["num_events"]))
	$ python multinom_test.py
	H0: the observed frequeicies follows the expected ones
	H1: the observed frequeicies does not follow the expected ones
	observed_frequencies = [0, 10, 6, 4, 5, 5]
	alpha = 0.05

	chisquare: p = 0.064663, H0 is NOT rejected
	multinom_test: p = 0.030745, H0 is rejected (# of possible events = 324632)
	import numpy as np
	from scipy import stats, special
	import functools

	# https://rpkgs.datanovia.com/rstatix/reference/multinom_test.html
	# https://github.com/kassambara/rstatix/blob/v0.7.2/R/multinom_test.R
	def multinom_test(x, p=None):
	x = np.array(x)
	if x.ndim != 1:
	raise ValueError("'x' must be a vector")
	if p is None:
	p = np.repeat(1/len(x), len(x))
	if not np.isclose(np.sum(p), 1):
	raise ValueError("sum of probabilities must be 1")
	if len(x) != len(p):
	raise ValueError("'x' and 'p' lengths differ")

	size = np.sum(x)
	groups = len(x)
	num_events = int(special.comb(size + groups - 1, groups - 1))
	p_obs = stats.multinomial.pmf(x, size, p)

	# use memoization to reduce repeated calls
	@functools.cache
	def find_vectors(groups, size):
	if groups == 1:
	mat = size
	else:
	mat = np.zeros((1, groups - 1))
	for i in range(1, size + 1):
	mat = np.vstack((mat, find_vectors(groups - 1, i)))
	mat = np.hstack((mat, size - np.sum(mat, axis=1).reshape(-1, 1)))
	return mat

	event_mat = find_vectors(groups, size)
	event_prob = stats.multinomial.pmf(event_mat, size, p)
	p_val = np.sum(event_prob[event_prob <= p_obs])
	return {'pvalue': p_val, 'num_events': num_events}

	# observed frequeicies from discrete uniform distribution
	# https://biolab.sakura.ne.jp/multinomial-test.html
	observed_frequencies = [0, 10, 6, 4, 5, 5]

	# significance level
	alpha = 0.05

	print("""H0: the observed frequeicies follows the expected ones
	H1: the observed frequeicies does not follow the expected ones
	observed_frequencies = {}
	alpha = {}
	""".format(observed_frequencies, alpha))

	result = stats.chisquare(observed_frequencies)
	s = "H0 is rejected" if result.pvalue < alpha else "H0 is NOT rejected"
	print(" chisquare: p = {:f}, {}".format(result.pvalue, s))

	result = multinom_test(observed_frequencies)
	s = "H0 is rejected" if result["pvalue"] < alpha else "H0 is NOT rejected"
	print("multinom_test: p = {:f}, {} (# of possible events = {})".format(result["pvalue"], s, result["num_events"]))