Created
May 22, 2013 20:07
-
-
Save timm/5630491 to your computer and use it in GitHub Desktop.
Python version of non-parametric hypothesis testing using Vargha and Delaney's A12 statistic.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Rx: | |
"has the nums of a treatment, its name and rank" | |
def __init__(i,lst): | |
i.rx, i.lst = lst[0], lst[1:] | |
i.mean = sum(i.lst)/len(i.lst) | |
i.rank = 0 | |
def __repr__(i): | |
return 'rank #%s %s at %s'%(i.rank,i.rx,i.mean) | |
def a12s(lst,rev=True,enough=0.66): | |
"sees if lst[i+1] has rank higher than lst[i]" | |
lst = [Rx(one) for one in lst] | |
lst = sorted(lst,key=lambda x:x.mean,reverse=rev) | |
one = lst[0] | |
rank = one.rank = 1 | |
for two in lst[1:]: | |
if a12(one.lst,two.lst,rev) > enough: rank += 1 | |
two.rank = rank | |
one = two | |
return lst | |
def a12(lst1,lst2,rev=True): | |
"how often is x in lst1 more than y in lst2?" | |
more = same = 0.0 | |
for x in lst1: | |
for y in lst2: | |
if x==y : same += 1 | |
elif rev and x > y : more += 1 | |
elif not rev and x < y : more += 1 | |
return (more + 0.5*same) / (len(lst1)*len(lst2)) | |
def fromFile(f="a12.dat",rev=True,enough=0.66): | |
"utility for reading sample data from disk" | |
import re | |
cache = {} | |
num, space = r'^\+?-?[0-9]', r'[ \t\n]+' | |
for line in open(f): | |
line = line.strip() | |
if line: | |
for word in re.split(space,line): | |
if re.match(num,word[0]): | |
cache[now] += [float(word)] | |
else: | |
now = word | |
cache[now] = [now] | |
return a12s(cache.values(),rev,enough) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
; Also, the "fromFile" supports reading from file. For example, if the file is this...
x1 0.34 0.49 0.51 0.60
x2 0.9 0.7 0.8 0.60
x3 0.15 0.25 0.4 0.35
x4 0.6 0.7 0.8 0.90
x5 0.1 0.2 0.3 0.40
; then this call will print the stats:
; for rx in fromFile(): print rx