kwcooper/Wald-Wolfowitz_Runs_Test.py

## Wald-Wolfowitz_Runs_Test.py
# Wald-Wolfowitz Runs Test (Actual)
# *** For educational purposes only,
# use more robust code for actual analysis

import math
import scipy.stats as st # for pvalue

# Example data (Current script only works for binary ints)
L = [1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1]


# Finds runs in data: counts and creates a list of them
# TODO: There has to be a more pythonic way to do this...
def getRuns(l):
    runsList = []
    tmpList = []
    for i in l:
        if len(tmpList) == 0:
            tmpList.append(i)
        elif i == tmpList[len(tmpList)-1]:
            tmpList.append(i)
        elif i != tmpList[len(tmpList)-1]:
            runsList.append(tmpList)
            tmpList = [i]
    runsList.append(tmpList)

    return len(runsList), runsList


# define the WW runs test described above
def WW_runs_test(R, n1, n2, n):
    # compute the standard error of R if the null (random) is true
    seR = math.sqrt( ((2*n1*n2) * (2*n1*n2 - n)) / ((n**2)*(n-1)) )

    # compute the expected value of R if the null is true
    muR = ((2*n1*n2)/n) + 1

    # test statistic: R vs muR
    z = (R - muR) / seR

    return z


# Gather info
numRuns, listOfRuns = getRuns(L) # Grab streaks in the data

# Define parameters
R = numRuns      # number of runs
n1 = sum(L)      # number of 1's
n2 = len(L) - n1 # number of 0's
n = n1 + n2      # should equal len(L)

# Run the test
ww_z = WW_runs_test(R, n1, n2, n)

# test the pvalue
p_values_one = st.norm.sf(abs(ww_z))   #one-sided
p_values_two = st.norm.sf(abs(ww_z))*2 #twosided

# Print results
print('Wald-Wolfowitz Runs Test')
print('Number of runs: %s' %(R))
print('Number of 1\'s: %s; Number of 0\'s: %s ' %(n1,n2))
print('Z value: %s' %(ww_z))
print('One tailed P value: %s; Two tailed P value: %s ' %(p_values_one, p_values_two))
	# Wald-Wolfowitz Runs Test (Actual)
	# *** For educational purposes only,
	# use more robust code for actual analysis

	import math
	import scipy.stats as st # for pvalue

	# Example data (Current script only works for binary ints)
	L = [1,1,1,0,1,1,1,0,0,1,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,0,0,1,0,0,1,1,1,0,0,0,1]


	# Finds runs in data: counts and creates a list of them
	# TODO: There has to be a more pythonic way to do this...
	def getRuns(l):
	runsList = []
	tmpList = []
	for i in l:
	if len(tmpList) == 0:
	tmpList.append(i)
	elif i == tmpList[len(tmpList)-1]:
	tmpList.append(i)
	elif i != tmpList[len(tmpList)-1]:
	runsList.append(tmpList)
	tmpList = [i]
	runsList.append(tmpList)

	return len(runsList), runsList


	# define the WW runs test described above
	def WW_runs_test(R, n1, n2, n):
	# compute the standard error of R if the null (random) is true
	seR = math.sqrt( ((2n1n2) * (2n1n2 - n)) / ((n*2)(n-1)) )

	# compute the expected value of R if the null is true
	muR = ((2n1n2)/n) + 1

	# test statistic: R vs muR
	z = (R - muR) / seR

	return z


	# Gather info
	numRuns, listOfRuns = getRuns(L) # Grab streaks in the data

	# Define parameters
	R = numRuns # number of runs
	n1 = sum(L) # number of 1's
	n2 = len(L) - n1 # number of 0's
	n = n1 + n2 # should equal len(L)

	# Run the test
	ww_z = WW_runs_test(R, n1, n2, n)

	# test the pvalue
	p_values_one = st.norm.sf(abs(ww_z)) #one-sided
	p_values_two = st.norm.sf(abs(ww_z))*2 #twosided

	# Print results
	print('Wald-Wolfowitz Runs Test')
	print('Number of runs: %s' %(R))
	print('Number of 1\'s: %s; Number of 0\'s: %s ' %(n1,n2))
	print('Z value: %s' %(ww_z))
	print('One tailed P value: %s; Two tailed P value: %s ' %(p_values_one, p_values_two))