Last active
February 19, 2024 21:37
-
-
Save nir-krakauer/7d308d1501bf2d07b30eb4a9a3afbd0a to your computer and use it in GitHub Desktop.
Shapiro-Wilk test in Octave
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Copyright (C) 2017 Nir Krakauer | |
## | |
## This program is free software; you can redistribute it and/or modify | |
## it under the terms of the GNU General Public License as published by | |
## the Free Software Foundation; either version 3 of the License, or | |
## (at your option) any later version. | |
## | |
## This program is distributed in the hope that it will be useful, | |
## but WITHOUT ANY WARRANTY; without even the implied warranty of | |
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
## GNU General Public License for more details. | |
## | |
## You should have received a copy of the GNU General Public License | |
## along with this program; If not, see <http://www.gnu.org/licenses/>. | |
## -*- texinfo -*- | |
## @deftypefn {Function File} {[@var{pval}, @var{W}] = } @ | |
## shapiro_wilk_test (@var{x}) | |
## | |
## Test the hypothesis that @var{x} is from a normal distribution | |
## using the Shapiro-Wilk test. If the returned @var{pval} is small, reject | |
## the hypothesis at the @var{pval}*100% level. | |
## | |
## The test statistic @var{W} is based on the correlation between the order statistics of the data | |
## with those expected from a normal distribution with the same mean. | |
## | |
## References: @* | |
## Patrick Royston (1992), "Approximating the Shapiro-Wilk W-test for non-normality", Statistics and Computing, 2(3): 117-119 @* | |
## Patrick Royston (1995), "AS R94: a remark on Algorithm AS 181: the W-test for normality", Journal of the Royal Statistical Society. Series C (Applied Statistics), 44(4): 547-551. | |
## | |
## @end deftypefn | |
## Author: Nir Krakauer <mail@nirkrakauer.net> | |
function [pval, W] = shapiro_wilk_test (x) | |
#the W statistic is sum(A .* x).^2 / sumsq(center(x)) | |
#where A is a vector of expected values of order statistics from a standard normal distribution | |
#once x is ordered, A is an antisymmetric vector, with A(n+1-i) = -A(i), 1 <= i <= n, and A'*A = 1 | |
#because of this, only half of A needs to be calculated; we consider a = A(n:-1:(n+1-floor(n/2))) | |
n = numel (x); | |
nh = floor (n/2); | |
if n < 3 | |
error ('The input vector x should have at least 3 elements') | |
endif | |
if n == 3 | |
a = sqrt(0.5); #exact | |
else | |
#get an initial estimate of a | |
m = -norminv(((1:nh)' - 0.375) / (n + 0.25)); | |
msq = 2*sumsq(m); | |
mrms = sqrt (msq); | |
#correction factors for initial elements of a (derived to be good approximations for 4 <= n <= 1000) | |
rsn = 1 / sqrt(n); | |
ac1 = m(1)/mrms + polyval ([-2.706056 4.434685 -2.07119 -0.147981 0.221157 0], rsn); | |
if n <= 5 | |
phi = (msq - 2*m(1)^2) / (1 - 2*ac1^2); | |
a = [ac1; m(2:nh)/sqrt(phi)]; | |
else | |
ac2 = m(2)/mrms + polyval ([-3.582633 5.682633 -1.752461 -0.293762 0.042981 0], rsn); | |
phi = (msq - 2*m(1)^2 - 2*m(2)^2) / (1 - 2*ac1^2 - 2*ac2^2); | |
a = [ac1; ac2; m(3:nh)/sqrt(phi)]; | |
endif | |
endif | |
x = sort (x(:)); | |
xm = mean (x); | |
W = sum(a .* (x(n:-1:n+1-nh) - x(1:nh))).^2 / sumsq(x - xm); | |
#p value for the W statistic being as small as it is for a normal distribution | |
if n == 3 | |
pval = (6/pi) * (asin(sqrt(W)) - asin(sqrt(0.75))); | |
elseif n <= 11 | |
gamma = 0.459*n - 2.273; | |
w = -log(gamma - log(1 - W)); | |
mu = polyval ([-0.0006714 0.025054 -0.39978 0.5440], gamma); | |
sigma = exp (polyval([-0.0020322 0.062767 -0.77857 1.3822], n)); | |
pval = normcdf ((mu - w) / sigma); | |
else | |
nl = log (n); | |
w = log (1 - W); | |
mu = polyval ([0.0038915 -0.083751 -0.31082 -1.5861], nl); | |
sigma = exp (polyval ([0.0030302 -0.082676 -0.4803], nl)); | |
pval = normcdf ((mu - w) / sigma); | |
endif | |
%!shared x, W, pval | |
%! x = [139 157 175 256 344 413 503 577 614 655 954 1392 1557 1648 1690 1994 2174 2206 3245 3510 3571 4354 4980 6084 8351]; #test data adapted from royston95 | |
%! [pval, W] = shapiro_wilk_test (x); | |
%!assert (pval, 9.14E-4, 0.01E-4) | |
%!assert (W, 0.83467, 0.00001) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment