Wald-Wolfowitz Runs Test demonstration in python
# Wald-Wolfowitz Runs Test (Actual) | |
# *** For educational purposes only, | |
# use more robust code for actual analysis | |
import math | |
import scipy.stats as st # for pvalue | |
# Example data (Current script only works for binary ints) | |
L = [1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1] | |
# Finds runs in data: counts and creates a list of them | |
# TODO: There has to be a more pythonic way to do this... | |
def getRuns(l): | |
runsList = [] | |
tmpList = [] | |
for i in l: | |
if len(tmpList) == 0: | |
tmpList.append(i) | |
elif i == tmpList[len(tmpList)-1]: | |
tmpList.append(i) | |
elif i != tmpList[len(tmpList)-1]: | |
runsList.append(tmpList) | |
tmpList = [i] | |
runsList.append(tmpList) | |
return len(runsList), runsList | |
# define the WW runs test described above | |
def WW_runs_test(R, n1, n2, n): | |
# compute the standard error of R if the null (random) is true | |
seR = math.sqrt( ((2*n1*n2) * (2*n1*n2 - n)) / ((n**2)*(n-1)) ) | |
# compute the expected value of R if the null is true | |
muR = ((2*n1*n2)/n) + 1 | |
# test statistic: R vs muR | |
z = (R - muR) / seR | |
return z | |
# Gather info | |
numRuns, listOfRuns = getRuns(L) # Grab streaks in the data | |
# Define parameters | |
R = numRuns # number of runs | |
n1 = sum(L) # number of 1's | |
n2 = len(L) - n1 # number of 0's | |
n = n1 + n2 # should equal len(L) | |
# Run the test | |
ww_z = WW_runs_test(R, n1, n2, n) | |
# test the pvalue | |
p_values_one = st.norm.sf(abs(ww_z)) #one-sided | |
p_values_two = st.norm.sf(abs(ww_z))*2 #twosided | |
# Print results | |
print('Wald-Wolfowitz Runs Test') | |
print('Number of runs: %s' %(R)) | |
print('Number of 1\'s: %s; Number of 0\'s: %s ' %(n1,n2)) | |
print('Z value: %s' %(ww_z)) | |
print('One tailed P value: %s; Two tailed P value: %s ' %(p_values_one, p_values_two)) |
This comment has been minimized.
This comment has been minimized.
Ha! I love it. Good eye @shadiakiki1986. Got to love itertools. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
The more pythonic way for
getRuns
is:and then drop the
listOfRuns
variable since you don't use it anywayRef: https://aaronschlegel.me/wald-wolfowitz-two-sample-runs-test.html