Created
September 14, 2015 15:45
-
-
Save schluppeck/e7635dcf0e80ca54efb0 to your computer and use it in GitHub Desktop.
Matlab code of Hartigans Dip Test [FORTRAN], as implemented by Ferenc Mechler, 2002; and retrieved from http://nicprice.net/diptest/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function [dip,xl,xu, ifault, gcm, lcm, mn, mj] = HartigansDipTest(xpdf) | |
% function [dip,xl,xu, ifault, gcm, lcm, mn, mj]=HartigansDipTest(xpdf) | |
% | |
% This is a direct translation by F. Mechler (August 27 2002) | |
% into MATLAB from the original FORTRAN code of Hartigan's Subroutine DIPTST algorithm | |
% Ref: Algorithm AS 217 APPL. STATIST. (1985) Vol. 34. No.3 pg 322-325 | |
% | |
% Appended by F. Mechler (September 2 2002) to deal with a perfectly unimodal input | |
% This check the original Hartigan algorithm omitted, which leads to an infinite cycle | |
% | |
% HartigansDipTest, like DIPTST, does the dip calculation for an ordered vector XPDF using | |
% the greatest convex minorant (gcm) and the least concave majorant (lcm), | |
% skipping through the data using the change points of these distributions. | |
% It returns the 'DIP' statistic, and 7 more optional results, which include | |
% the modal interval (XL,XU), ann error flag IFAULT (>0 flags an error) | |
% as well as the minorant and majorant fits GCM, LCM, and the corresponding support indices MN, and MJ | |
% sort X in increasing order in column vector | |
x=sort(xpdf(:)); | |
N=length(x); | |
mn=zeros(size(x)); | |
mj=zeros(size(x)); | |
lcm=zeros(size(x)); | |
gcm=zeros(size(x)); | |
ifault=0; | |
% Check that N is positive | |
if (N<=0) | |
ifault=1; | |
fprintf(1,'\nHartigansDipTest. InputError : ifault=%d\n',ifault); | |
return; | |
end; | |
% Check if N is one | |
if (N==1) | |
xl=x(1); | |
xu=x(N); | |
dip=0.0; | |
ifault=2; | |
fprintf(1,'\nHartigansDipTest. InputError : ifault=%d\n',ifault); | |
return; | |
end; | |
if (N>1) | |
% Check that X is sorted | |
if (x ~= sort(x)) | |
ifault=3; | |
fprintf(1,'\nHartigansDipTest. InputError : ifault=%d\n',ifault); | |
return; | |
end; | |
% Check for all values of X identical OR for case 1<N<4 | |
if ~((x(N)>x(1)) & (4<=N)) | |
xl=x(1); | |
xu=x(N); | |
dip=0.0; | |
ifault=4; | |
fprintf(1,'\nHartigansDipTest. InputError : ifault=%d\n',ifault); | |
return; | |
end; | |
end; | |
% Check if X is perfectly unimodal | |
% Hartigan's original DIPTST algorithm did not check for this condition | |
% and DIPTST runs into infinite cycle for a unimodal input | |
% The condition that the input is unimodal is equivalent to having | |
% at most 1 sign change in the second derivative of the input p.d.f. | |
xsign=-sign(diff(diff(x))); | |
% This condition check below works even | |
% if the unimodal p.d.f. has its mode in the very first or last point of the input | |
% because then the boolean argument is Empty Matrix, and ANY returns 1 for an Empty Matrix | |
posi=find(xsign>0); | |
negi=find(xsign<0); | |
if isempty(posi) | isempty(negi) | all(posi<min(negi)) | |
% A unimodal function is its own best unimodal approximation, with a zero corresponding dip | |
xl=x(1); | |
xu=x(N); | |
dip=0.0; | |
ifault=5; | |
%fprintf(1,'\n The input is a perfectly UNIMODAL input function\n'); | |
return; | |
end; | |
% LOW contains the index of the current estimate of the lower end of the modal interval | |
% HIGH contains the index of the current estimate of the upper end of the modal interval | |
fn=N; | |
low=1; | |
high=N; | |
dip=1/fn; | |
xl=x(low); | |
xu=x(high); | |
% establish the indices over which combination is necessary for the convex minorant fit | |
mn(1)=1; | |
for j=2:N | |
mn(j)=j-1; | |
% here is the beginning of a while loop | |
mnj=mn(j); | |
mnmnj=mn(mnj); | |
a=mnj-mnmnj; | |
b=j-mnj; | |
while ~( (mnj==1) | ((x(j)-x(mnj))*a < (x(mnj)-x(mnmnj))*b)) | |
mn(j)=mnmnj; | |
mnj=mn(j); | |
mnmnj=mn(mnj); | |
a=mnj-mnmnj; | |
b=j-mnj; | |
end; % here is the end of the while loop | |
end; % end for j=2:N | |
% establish the indices over which combination is necessary for the concave majorant fit | |
mj(N)=N; | |
na=N-1; | |
for jk=1:na | |
k=N-jk; | |
mj(k)=k+1; | |
% here is the beginning of a while loop | |
mjk=mj(k); | |
mjmjk=mj(mjk); | |
a=mjk-mjmjk; | |
b=k-mjk; | |
while ~( (mjk==N) | ((x(k)-x(mjk))*a < (x(mjk)-x(mjmjk))*b)) | |
mj(k)=mjmjk; | |
mjk=mj(k); | |
mjmjk=mj(mjk); | |
a=mjk-mjmjk; | |
b=k-mjk; | |
end; % here is the end of the while loop | |
end; % end for jk=1:na | |
itarate_flag = 1; | |
% start the cycling of great RECYCLE | |
while itarate_flag | |
% collect the change points for the GCM from HIGH to LOW | |
% CODE BREAK POINT 40 | |
ic=1; | |
gcm(1)=high; | |
igcm1=gcm(ic); | |
ic=ic+1; | |
gcm(ic)=mn(igcm1); | |
while(gcm(ic) > low) | |
igcm1=gcm(ic); | |
ic=ic+1; | |
gcm(ic)=mn(igcm1); | |
end; | |
icx=ic; | |
% collect the change points for the LCM from LOW to HIGH | |
ic=1; | |
lcm(1)=low; | |
lcm1=lcm(ic); | |
ic=ic+1; | |
lcm(ic)=mj(lcm1); | |
while(lcm(ic) < high) | |
lcm1=lcm(ic); | |
ic=ic+1; | |
lcm(ic)=mj(lcm1); | |
end; | |
icv=ic; | |
% ICX, IX, IG are counters for the convex minorant | |
% ICV, IV, IH are counters for the concave majorant | |
ig=icx; | |
ih=icv; | |
% find the largest distance greater than 'DIP' between the GCM and the LCM from low to high | |
ix=icx-1; | |
iv=2; | |
d=0.0; | |
% Either GOTO CODE BREAK POINT 65 OR ELSE GOTO CODE BREAK POINT 50; | |
if ~(icx~=2 | icv~=2) | |
d=1.0/fn; | |
else | |
iterate_BP50=1; | |
while iterate_BP50 | |
% CODE BREAK POINT 50 | |
igcmx=gcm(ix); | |
lcmiv=lcm(iv); | |
if ~(igcmx > lcmiv) | |
% if the next point of either the GCM or LCM is from the LCM then calculate distance here | |
% OTHERWISE, GOTO BREAK POINT 55 | |
lcmiv1=lcm(iv-1); | |
a=lcmiv-lcmiv1; | |
b=igcmx-lcmiv1-1; | |
dx=(x(igcmx)-x(lcmiv1))*a/(fn*(x(lcmiv)-x(lcmiv1)))-b/fn; | |
ix=ix-1; | |
if(dx < d) | |
goto60 = 1; | |
else | |
d=dx; | |
ig=ix+1; | |
ih=iv; | |
goto60 = 1; | |
end; | |
else | |
% if the next point of either the GCM or LCM is from the GCM then calculate distance here | |
% CODE BREAK POINT 55 | |
lcmiv=lcm(iv); | |
igcm=gcm(ix); | |
igcm1=gcm(ix+1); | |
a=lcmiv-igcm1+1; | |
b=igcm-igcm1; | |
dx=a/fn-((x(lcmiv)-x(igcm1))*b)/(fn*(x(igcm)-x(igcm1))); | |
iv=iv+1; | |
if ~(dx < d) | |
d=dx; | |
ig=ix+1; | |
ih=iv-1; | |
end; | |
goto60 = 1; | |
end; | |
if goto60 | |
% CODE BREAK POINT 60 | |
if (ix < 1) ix=1; end; | |
if (iv > icv) iv=icv; end; | |
iterate_BP50 = (gcm(ix) ~= lcm(iv)); | |
end; | |
end; % End of WHILE iterate_BP50 | |
end; % End of ELSE (IF ~(icx~=2 | icv~=2)) i.e., either GOTO CODE BREAK POINT 65 OR ELSE GOTO CODE BREAK POINT 50 | |
% CODE BREAK POINT 65 | |
itarate_flag = ~(d < dip); | |
if itarate_flag | |
% if itarate_flag is true, then continue calculations and the great iteration cycle | |
% if itarate_flag is NOT true, then stop calculations here, and break out of great iteration cycle to BREAK POINT 100 | |
% calculate the DIPs for the corrent LOW and HIGH | |
% the DIP for the convex minorant | |
dl=0.0; | |
% if not true, go to CODE BREAK POINT 80 | |
if (ig ~= icx) | |
icxa=icx-1; | |
for j=ig:icxa | |
temp=1.0/fn; | |
jb=gcm(j+1); | |
je=gcm(j); | |
% if not true either, go to CODE BREAK POINT 74 | |
if ~(je-jb <= 1) | |
if~(x(je)==x(jb)) | |
a=(je-jb); | |
const=a/(fn*(x(je)-x(jb))); | |
for jr=jb:je | |
b=jr-jb+1; | |
t=b/fn-(x(jr)-x(jb))*const; | |
if (t>temp) temp=t; end; | |
end; | |
end; | |
end; | |
% CODE BREAK POINT 74 | |
if (dl < temp) dl=temp; end; | |
end; | |
end; | |
% the DIP for the concave majorant | |
% CODE BREAK POINT 80 | |
du=0.0; | |
% if not true, go to CODE BREAK POINT 90 | |
if ~(ih==icv) | |
icva=icv-1; | |
for k=ih:icva | |
temp=1.0/fn; | |
kb=lcm(k); | |
ke=lcm(k+1); | |
% if not true either, go to CODE BREAK POINT 86 | |
if ~(ke-kb <= 1) | |
if ~(x(ke)==x(kb)) | |
a=ke-kb; | |
const=a/(fn*(x(ke)-x(kb))); | |
for kr=kb:ke | |
b=kr-kb-1; | |
t=(x(kr)-x(kb))*const-b/fn; | |
if (t>temp) temp=t; end; | |
end; | |
end; | |
end; | |
% CODE BREAK POINT 86 | |
if (du < temp) du=temp; end; | |
end; | |
end; | |
% determine the current maximum | |
% CODE BREAK POINT 90 | |
dipnew=dl; | |
if (du > dl) dipnew=du; end; | |
if (dip < dipnew) dip=dipnew; end; | |
low=gcm(ig); | |
high=lcm(ih); | |
end; % end of IF(itarate_flag) CODE from BREAK POINT 65 | |
% return to CODE BREAK POINT 40 or break out of great RECYCLE; | |
end; % end of WHILE of great RECYCLE | |
% CODE BREAK POINT 100 | |
dip=0.5*dip; | |
xl=x(low); | |
xu=x(high); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment