Last active
January 1, 2023 15:29
-
-
Save josef-pkt/7035711 to your computer and use it in GitHub Desktop.
Cuzick's non-parametric test of trend
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''Cuzick's non-parametric test of trend | |
Author : Josef Perktold | |
''' | |
import numpy as np | |
from scipy import stats | |
from scipy.stats.stats import rankdata, tiecorrect, square_of_sums | |
class Bunch(object): | |
pass | |
# written based on scipy.stats.kruskal | |
def trend_cuzick(data_list, levels=None): | |
""" | |
Compute the Cuzick trend test for independent samples | |
Parameters | |
---------- | |
sample1, sample2, ... : array_like | |
Two or more arrays with the sample measurements can be given as | |
arguments. | |
levels : None, or array_like | |
These are the numberic values associated with the groups. If it is None, | |
then integers 1, 2, 3, ... are used. | |
Returns | |
------- | |
res : a Bunch instance with | |
p_value, test statistic and additional attributes | |
Notes | |
----- | |
The p-value is based on the approximation by the normal distribution. | |
To be a good approximation the number of samples in each group should | |
not be too small. | |
""" | |
args = list(map(np.asarray, data_list)) # convert to a numpy array | |
na = len(args) # Kruskal-Wallis on 'na' groups, each in it's own array | |
if na < 2: | |
raise ValueError("Need at least two groups in stats.kruskal()") | |
n = np.asarray(list(map(len, args))) | |
if levels is None: | |
levels = np.arange(1, na + 1) | |
else: | |
levels = np.asarray(levels) | |
alldata = np.concatenate(args) | |
ranked = rankdata(alldata) # Rank the data | |
T = tiecorrect(ranked) # Correct for ties | |
if T == 0: | |
raise ValueError('All numbers are identical in kruskal') | |
L = (levels * n).sum() | |
# group indices interval | |
idx = np.insert(np.cumsum(n), 0, 0) | |
ranksum = [ranked[idx[i]:idx[i+1]].sum() for i in range(na)] | |
# could use reduceat instead | |
value = (levels * ranksum).sum() # T in Stata manual | |
nobs = n.sum() | |
#tie_correction is np.sqrt(T) | |
var = (nobs + 1.) / 12. * (nobs * (levels**2 * n).sum() - L**2) * T | |
se = np.sqrt(var) | |
value_standardized = (value - 0.5 * (nobs + 1) * L) / se #/ np.sqrt(T) | |
res = Bunch() | |
res.value = value | |
res.p_value = stats.norm.sf(value_standardized)*2 | |
res.statistic = value_standardized | |
res.ranksum = ranksum | |
res.tiecorrection = T | |
res.nobs_groups = n | |
res.n_groups = na | |
res.nobs = nobs | |
return res | |
#group exposure | |
data = np.array('''\ | |
1 1.4 | |
1 1.4 | |
1 1.4 | |
1 1.6 | |
1 2.3 | |
1 2.3 | |
2 .9 | |
2 1 | |
2 1.1 | |
2 1.1 | |
2 1.2 | |
2 1.2 | |
2 1.5 | |
2 1.9 | |
2 2.2 | |
2 2.6 | |
2 2.6 | |
2 2.6 | |
2 2.8 | |
2 2.8 | |
2 3.2 | |
2 3.5 | |
2 4.3 | |
2 5.1 | |
3 .8 | |
3 1.7 | |
3 1.7 | |
3 1.7 | |
3 3.4 | |
3 7.1 | |
3 8.9 | |
3 13.5'''.split(), float).reshape(-1, 2) | |
groups = data[:,0].astype(int) | |
xli = [data[data[:,0] == k, 1] for k in [1, 2, 3]] | |
res = trend_cuzick(xli) | |
from pprint import pprint | |
pprint(vars(res)) | |
r = {} | |
r['p'] = .1287642613976359 | |
r['z'] = 1.51899298636755 | |
r['T'] = 1142 | |
r['N'] = 32 | |
pprint(r) | |
print 'diff p-value', res.p_value - r['p'] | |
print 'diff statistic', res.statistic - r['z'] | |
print 'diff value', res.value - r['T'] | |
print '\n changing levels' | |
res2 = trend_cuzick(xli, levels=[1, 2, 5]) | |
pprint(vars(res2)) |
Author
josef-pkt
commented
Oct 18, 2013
Great! Love it! Although it's been ages since this been worked on, I'd like to propose a very minor correction. You need to take the abs of the z score for the p value, as otherwise your p is going to be larger than 1 for the reverse case, e.g. levels = [3, 2, 1]
res.p_value = stats.norm.sf(np.abs(value_standardized))*2
And one more thing... 'square_of_sums' module is not used and I believe not part of the stats,stats module anymore
Regards, Majte
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment