inaz2/goodness_of_fit.py

## gistfile1.txt
$ python goodness_of_fit.py
H0: the observed frequeicies follows the expected ones
H1: the observed frequeicies does not follow the expected ones

observed_frequencies = [0, 10, 6, 4, 5, 5]
alpha = 0.05

           pearson: p = 0.064663, H0 is NOT rejected
    log-likelihood: p = 0.014007, H0 is rejected
     freeman-tukey: p = 0.000233, H0 is rejected
mod-log-likelihood: p = 0.000000, H0 is rejected
            neyman: p = 0.000000, H0 is rejected
      cressie-read: p = 0.051902, H0 is NOT rejected

Description
pearson: Pearson’s chi-squared statistic. In this case, the function is equivalent to chisquare.
log-likelihood: Log-likelihood ratio. Also known as the G-test.
freeman-tukey: Freeman-Tukey statistic.
mod-log-likelihood: Modified log-likelihood ratio.
neyman: Neyman’s statistic.
cressie-read: The power recommended in the paper by Cressie and Read.

## goodness_of_fit.py
import numpy as np
from scipy.stats import power_divergence

# observed frequeicies from discrete uniform distribution
# https://biolab.sakura.ne.jp/multinomial-test.html
observed_frequencies = [0, 10, 6, 4, 5, 5]

# significance level
alpha = 0.05

print("""H0: the observed frequeicies follows the expected ones
H1: the observed frequeicies does not follow the expected ones

observed_frequencies = {}
alpha = {}
""".format(observed_frequencies, alpha))

# replace zero to machine epsilon to avoid zero division error
eps = np.finfo(float).eps

for i, v in enumerate(observed_frequencies):
    if observed_frequencies[i] < 1:
        observed_frequencies[i] = eps

# https://docs.scipy.org/doc/scipy-1.11.3/reference/generated/scipy.stats.power_divergence.html
methods = {
    "pearson": "Pearson’s chi-squared statistic. In this case, the function is equivalent to chisquare.",
    "log-likelihood": "Log-likelihood ratio. Also known as the G-test.",
    "freeman-tukey": "Freeman-Tukey statistic.",
    "mod-log-likelihood": "Modified log-likelihood ratio.",
    "neyman": "Neyman’s statistic.",
    "cressie-read": "The power recommended in the paper by Cressie and Read.",
}

for k, v in methods.items():
    result = power_divergence(observed_frequencies, lambda_=k)
    s = "H0 is rejected" if result.pvalue < alpha else "H0 is NOT rejected"
    print("{:>18s}: p = {:f}, {}".format(k, result.pvalue, s))

print()
print("Description")

for k, v in methods.items():
    print("{}: {}".format(k, v))
	$ python goodness_of_fit.py
	H0: the observed frequeicies follows the expected ones
	H1: the observed frequeicies does not follow the expected ones

	observed_frequencies = [0, 10, 6, 4, 5, 5]
	alpha = 0.05

	pearson: p = 0.064663, H0 is NOT rejected
	log-likelihood: p = 0.014007, H0 is rejected
	freeman-tukey: p = 0.000233, H0 is rejected
	mod-log-likelihood: p = 0.000000, H0 is rejected
	neyman: p = 0.000000, H0 is rejected
	cressie-read: p = 0.051902, H0 is NOT rejected

	Description
	pearson: Pearson’s chi-squared statistic. In this case, the function is equivalent to chisquare.
	log-likelihood: Log-likelihood ratio. Also known as the G-test.
	freeman-tukey: Freeman-Tukey statistic.
	mod-log-likelihood: Modified log-likelihood ratio.
	neyman: Neyman’s statistic.
	cressie-read: The power recommended in the paper by Cressie and Read.
	import numpy as np
	from scipy.stats import power_divergence

	# observed frequeicies from discrete uniform distribution
	# https://biolab.sakura.ne.jp/multinomial-test.html
	observed_frequencies = [0, 10, 6, 4, 5, 5]

	# significance level
	alpha = 0.05

	print("""H0: the observed frequeicies follows the expected ones
	H1: the observed frequeicies does not follow the expected ones

	observed_frequencies = {}
	alpha = {}
	""".format(observed_frequencies, alpha))

	# replace zero to machine epsilon to avoid zero division error
	eps = np.finfo(float).eps

	for i, v in enumerate(observed_frequencies):
	if observed_frequencies[i] < 1:
	observed_frequencies[i] = eps

	# https://docs.scipy.org/doc/scipy-1.11.3/reference/generated/scipy.stats.power_divergence.html
	methods = {
	"pearson": "Pearson’s chi-squared statistic. In this case, the function is equivalent to chisquare.",
	"log-likelihood": "Log-likelihood ratio. Also known as the G-test.",
	"freeman-tukey": "Freeman-Tukey statistic.",
	"mod-log-likelihood": "Modified log-likelihood ratio.",
	"neyman": "Neyman’s statistic.",
	"cressie-read": "The power recommended in the paper by Cressie and Read.",
	}

	for k, v in methods.items():
	result = power_divergence(observed_frequencies, lambda_=k)
	s = "H0 is rejected" if result.pvalue < alpha else "H0 is NOT rejected"
	print("{:>18s}: p = {:f}, {}".format(k, result.pvalue, s))

	print()
	print("Description")

	for k, v in methods.items():
	print("{}: {}".format(k, v))