zorbax/nonparametric_tests.py

## nonparametric_tests.py
#!/usr/bin/env python3

from numpy.random import seed
from numpy.random import randn
from numpy import mean
from numpy import std
from scipy.stats import mannwhitneyu
from scipy.stats import wilcoxon
from scipy.stats import kruskal
from scipy.stats import friedmanchisquare

"""
* Mann-Whitney U test: compare independent data samples;
            the nonparametric version of the Student t-test.

* Wilcoxon signed-rank test: compar paired data samples;
            the nonparametric version of the paired Student t-test.

* Kruskal-Wallis H test: compare more than two data samples;
            the nonparametric version of the ANOVA.

* Friedman test: compare more than two data samples;
            the nonparametric version of repeated measures
            ANOVA test (two-way ANOVA).
"""

"""
* Mann, H. B., & Whitney, D. R. (1947). On a test of whether one of
    two random variables is stochastically larger than the other.
    The annals of mathematical statistics, 50-60.
    https://projecteuclid.org/euclid.aoms/1177730491

* Wilcoxon, F. (1945). Individual comparisons by ranking methods.
    Biometrics bulletin, 1(6), 80-83.
    http://sci2s.ugr.es/keel/pdf/algorithm/articulo/wilcoxon1945.pdf
"""

# reset random state
seed(1)

#  100 Gaussian random numbers, mean = 0, std = 1
randn(100)

# generate two sets of univariate observations
# data1 > mean = 50, std = 5
# data2 > mean = 51, std = 5
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 51

# summarize
print('data1: mean=%.3f stdv=%.3f' % (mean(data1), std(data1)))
print('data2: mean=%.3f stdv=%.3f' % (mean(data2), std(data2)))

# Mann-Whitney U test
# compare samples
stat, p = mannwhitneyu(data1, data2)

# The p-value is an interpretation of the critical value (stat value).
print('Statistics=%.3f, p=%.3f' % (stat, p))

# interpret
alpha = 0.05

if p > alpha:
    print('Same distribution (fail to reject H0)')
else:
    print('Different distribution (reject H0)')

# Wilcoxon signed-rank test
# generate two sets of univariate observations
seed(1)
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 51

# compare samples
stat, p = wilcoxon(data1, data2)

print('Statistics=%.3f, p=%.3f' % (stat, p))

# interpret
alpha = 0.05

if p > alpha:
    print('Same distribution (fail to reject H0)')
else:
    print('Different distribution (reject H0)')

# Kruskal-Wallis H-test
# generate three independent samples
seed(1)
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 50
data3 = 5 * randn(100) + 52

# compare samples
stat, p = kruskal(data1, data2, data3)

print('Statistics=%.3f, p=%.3f' % (stat, p))

# interpret
alpha = 0.05

if p > alpha:
    print('Same distributions (fail to reject H0)')
else:
    print('Different distributions (reject H0)')

# Friedman test
# generate three independent samples

seed(1)
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 50
data3 = 5 * randn(100) + 52

# compare samples
stat, p = friedmanchisquare(data3, data4, data5)

print('Statistics=%.3f, p=%.3f' % (stat, p))

# interpret
alpha = 0.05

if p > alpha:
    print('Same distributions (fail to reject H0)')
else:
    print('Different distributions (reject H0)')
	#!/usr/bin/env python3

	from numpy.random import seed
	from numpy.random import randn
	from numpy import mean
	from numpy import std
	from scipy.stats import mannwhitneyu
	from scipy.stats import wilcoxon
	from scipy.stats import kruskal
	from scipy.stats import friedmanchisquare

	"""
	* Mann-Whitney U test: compare independent data samples;
	the nonparametric version of the Student t-test.

	* Wilcoxon signed-rank test: compar paired data samples;
	the nonparametric version of the paired Student t-test.

	* Kruskal-Wallis H test: compare more than two data samples;
	the nonparametric version of the ANOVA.

	* Friedman test: compare more than two data samples;
	the nonparametric version of repeated measures
	ANOVA test (two-way ANOVA).
	"""

	"""
	* Mann, H. B., & Whitney, D. R. (1947). On a test of whether one of
	two random variables is stochastically larger than the other.
	The annals of mathematical statistics, 50-60.
	https://projecteuclid.org/euclid.aoms/1177730491

	* Wilcoxon, F. (1945). Individual comparisons by ranking methods.
	Biometrics bulletin, 1(6), 80-83.
	http://sci2s.ugr.es/keel/pdf/algorithm/articulo/wilcoxon1945.pdf
	"""

	# reset random state
	seed(1)

	# 100 Gaussian random numbers, mean = 0, std = 1
	randn(100)

	# generate two sets of univariate observations
	# data1 > mean = 50, std = 5
	# data2 > mean = 51, std = 5
	data1 = 5 * randn(100) + 50
	data2 = 5 * randn(100) + 51

	# summarize
	print('data1: mean=%.3f stdv=%.3f' % (mean(data1), std(data1)))
	print('data2: mean=%.3f stdv=%.3f' % (mean(data2), std(data2)))

	# Mann-Whitney U test
	# compare samples
	stat, p = mannwhitneyu(data1, data2)

	# The p-value is an interpretation of the critical value (stat value).
	print('Statistics=%.3f, p=%.3f' % (stat, p))

	# interpret
	alpha = 0.05

	if p > alpha:
	print('Same distribution (fail to reject H0)')
	else:
	print('Different distribution (reject H0)')

	# Wilcoxon signed-rank test
	# generate two sets of univariate observations
	seed(1)
	data1 = 5 * randn(100) + 50
	data2 = 5 * randn(100) + 51

	# compare samples
	stat, p = wilcoxon(data1, data2)

	print('Statistics=%.3f, p=%.3f' % (stat, p))

	# interpret
	alpha = 0.05

	if p > alpha:
	print('Same distribution (fail to reject H0)')
	else:
	print('Different distribution (reject H0)')

	# Kruskal-Wallis H-test
	# generate three independent samples
	seed(1)
	data1 = 5 * randn(100) + 50
	data2 = 5 * randn(100) + 50
	data3 = 5 * randn(100) + 52

	# compare samples
	stat, p = kruskal(data1, data2, data3)

	print('Statistics=%.3f, p=%.3f' % (stat, p))

	# interpret
	alpha = 0.05

	if p > alpha:
	print('Same distributions (fail to reject H0)')
	else:
	print('Different distributions (reject H0)')

	# Friedman test
	# generate three independent samples

	seed(1)
	data1 = 5 * randn(100) + 50
	data2 = 5 * randn(100) + 50
	data3 = 5 * randn(100) + 52

	# compare samples
	stat, p = friedmanchisquare(data3, data4, data5)

	print('Statistics=%.3f, p=%.3f' % (stat, p))

	# interpret
	alpha = 0.05

	if p > alpha:
	print('Same distributions (fail to reject H0)')
	else:
	print('Different distributions (reject H0)')