Skip to content

Instantly share code, notes, and snippets.

@zorbax
Created March 11, 2019 08:18
Show Gist options
  • Save zorbax/526f3c959cd43381f84e5be3fc3b304c to your computer and use it in GitHub Desktop.
Save zorbax/526f3c959cd43381f84e5be3fc3b304c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
from numpy.random import seed
from numpy.random import randn
from numpy import mean
from numpy import std
from scipy.stats import mannwhitneyu
from scipy.stats import wilcoxon
from scipy.stats import kruskal
from scipy.stats import friedmanchisquare
"""
* Mann-Whitney U test: compare independent data samples;
the nonparametric version of the Student t-test.
* Wilcoxon signed-rank test: compar paired data samples;
the nonparametric version of the paired Student t-test.
* Kruskal-Wallis H test: compare more than two data samples;
the nonparametric version of the ANOVA.
* Friedman test: compare more than two data samples;
the nonparametric version of repeated measures
ANOVA test (two-way ANOVA).
"""
"""
* Mann, H. B., & Whitney, D. R. (1947). On a test of whether one of
two random variables is stochastically larger than the other.
The annals of mathematical statistics, 50-60.
https://projecteuclid.org/euclid.aoms/1177730491
* Wilcoxon, F. (1945). Individual comparisons by ranking methods.
Biometrics bulletin, 1(6), 80-83.
http://sci2s.ugr.es/keel/pdf/algorithm/articulo/wilcoxon1945.pdf
"""
# reset random state
seed(1)
# 100 Gaussian random numbers, mean = 0, std = 1
randn(100)
# generate two sets of univariate observations
# data1 > mean = 50, std = 5
# data2 > mean = 51, std = 5
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 51
# summarize
print('data1: mean=%.3f stdv=%.3f' % (mean(data1), std(data1)))
print('data2: mean=%.3f stdv=%.3f' % (mean(data2), std(data2)))
# Mann-Whitney U test
# compare samples
stat, p = mannwhitneyu(data1, data2)
# The p-value is an interpretation of the critical value (stat value).
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
print('Same distribution (fail to reject H0)')
else:
print('Different distribution (reject H0)')
# Wilcoxon signed-rank test
# generate two sets of univariate observations
seed(1)
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 51
# compare samples
stat, p = wilcoxon(data1, data2)
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
print('Same distribution (fail to reject H0)')
else:
print('Different distribution (reject H0)')
# Kruskal-Wallis H-test
# generate three independent samples
seed(1)
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 50
data3 = 5 * randn(100) + 52
# compare samples
stat, p = kruskal(data1, data2, data3)
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
print('Same distributions (fail to reject H0)')
else:
print('Different distributions (reject H0)')
# Friedman test
# generate three independent samples
seed(1)
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 50
data3 = 5 * randn(100) + 52
# compare samples
stat, p = friedmanchisquare(data3, data4, data5)
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
print('Same distributions (fail to reject H0)')
else:
print('Different distributions (reject H0)')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment