timm/sk.py

## sk.py
#!/usr/bin/env python3
# vim: sta:et:sw=2:ts=2:sts=2 :

from copy import deepcopy as kopy
import sys,random

"""
Scott-Knot test + non parametric effect size + significance tests.
Tim Menzies, 2019. Share and enjoy. No warranty. Caveat Emptor.

Accepts data as per the following exmaple (you can ignore the "*n"
stuff, that is just there for the purposes of demos on larger
and larger data)

Ouputs treatments, clustered such that things that have similar
results get the same ranks.

For a demo of this code, just run

    python3 sk.py

"""

#-----------------------------------------------------
# Examples

def skDemo(n=5) :
  #Rx.data is one way to run the code
  return Rx.data( x1 =[ 0.34, 0.49 ,0.51, 0.6]*n,
                  x2  =[0.6  ,0.7 , 0.8 , 0.89]*n,
                  x3  =[0.13 ,0.23, 0.38 , 0.38]*n,
                  x4  =[0.6  ,0.7,  0.8 , 0.9]*n,
                  x5  =[0.1  ,0.2,  0.3 , 0.4]*n)

"""
Another is to make a file

x1  0.34  0.49  0.51  0.6
x2  0.6   0.7   0.8   0.9
x3  0.15  0.25  0.4   0.35
x4  0.6   0.7   0.8   0.9
x5  0.1   0.2   0.3   0.4

Then call

   Rx.fileIn( fileName )

"""

#-----------------------------------------------------
# Config

class o:
  def __init__(i,**d) : i.__dict__.update(**d)

class THE:
  cliffs = o(dull= [0.147, # small
                    0.33,  # medium
                    0.474 # large
                    ][0])
  bs=     o( conf=0.05,
             b=500)
  mine =  o( private="_")
  char =  o( skip="?")
  rx   =  o( show="%4s %10s %s")
  tile =  o( width=50,
             chops=[0.1 ,0.3,0.5,0.7,0.9],
             marks=[" " ,"-","-","-"," "],
             bar="|",
             star="*",
             show=" %5.3f")
#-----------------------------------------------------
def cliffsDeltaSlow(lst1,lst2, dull = THE.cliffs.dull):
  """Returns true if there are more than 'dull' difference.
     Warning: O(N)^2."""
  n= gt = lt = 0.0
  for x in lst1:
    for y in lst2:
      n += 1
      if x > y:  gt += 1
      if x < y:  lt += 1
  return abs(lt - gt)/n <= dull

def cliffsDelta(lst1, lst2,  dull=THE.cliffs.dull):
  "By pre-soring the lists, this cliffsDelta runs in NlogN time"
  def runs(lst):
    for j,two in enumerate(lst):
      if j == 0: one,i = two,0
      if one!=two:
        yield j - i,one
        i = j
      one=two
    yield j - i + 1,two
  #---------------------
  m, n = len(lst1), len(lst2)
  lst2 = sorted(lst2)
  j = more = less = 0
  for repeats,x in runs(sorted(lst1)):
    while j <= (n - 1) and lst2[j] <  x: j += 1
    more += j*repeats
    while j <= (n - 1) and lst2[j] == x: j += 1
    less += (n - j)*repeats
  d= (more - less) / (m*n)
  return abs(d)  <= dull

def bootstrap(y0,z0,conf=THE.bs.conf,b=THE.bs.b):
  """
  two  lists y0,z0 are the same if the same patterns can be seen in all of them, as well
  as in 100s to 1000s  sub-samples from each.
  From p220 to 223 of the Efron text  'introduction to the boostrap'.
  Typically, conf=0.05 and b is 100s to 1000s.
  """
  class Sum():
    def __init__(i,some=[]):
      i.sum = i.n = i.mu = 0 ; i.all=[]
      for one in some: i.put(one)
    def put(i,x):
      i.all.append(x);
      i.sum +=x; i.n += 1; i.mu = float(i.sum)/i.n
    def __add__(i1,i2): return Sum(i1.all + i2.all)
  def testStatistic(y,z):
     tmp1 = tmp2 = 0
     for y1 in y.all: tmp1 += (y1 - y.mu)**2
     for z1 in z.all: tmp2 += (z1 - z.mu)**2
     s1    = float(tmp1)/(y.n - 1)
     s2    = float(tmp2)/(z.n - 1)
     delta = z.mu - y.mu
     if s1+s2:
       delta =  delta/((s1/y.n + s2/z.n)**0.5)
     return delta
  def one(lst): return lst[ int(any(len(lst))) ]
  def any(n)  : return random.uniform(0,n)
  y,z  = Sum(y0), Sum(z0)
  x    = y + z
  baseline = testStatistic(y,z)
  yhat = [y1 - y.mu + x.mu for y1 in y.all]
  zhat = [z1 - z.mu + x.mu for z1 in z.all]
  bigger = 0
  for i in range(b):
    if testStatistic(Sum([one(yhat) for _ in yhat]),
                     Sum([one(zhat) for _ in zhat])) > baseline:
      bigger += 1
  return bigger / b >= conf

#-------------------------------------------------------
# misc functions
def same(x): return x

class Mine:
  "class that, amongst other times, pretty prints objects"
  oid = 0
  def identify(i):
    Mine.oid += 1
    i.oid = Mine.oid
    return i.oid
  def __repr__(i):
    pairs = sorted([(k, v) for k, v in i.__dict__.items()
                    if k[0] != THE.mine.private])
    pre = i.__class__.__name__ + '{'
    def q(z):
     if isinstance(z,str): return "'%s'" % z
     if callable(z): return "fun(%s)" % z.__name__
     return str(z)
    return pre + ", ".join(['%s=%s' % (k, q(v))])

#-------------------------------------------------------
class Rx(Mine):
  "place to manage pairs of (TreatmentName,ListofResults)"
  def __init__(i, rx="",vals=[], key=same):
    i.rx   = rx
    i.vals = sorted([x for x in vals if x != THE.char.skip])
    i.n    = len(i.vals)
    i.med  = i.vals[int(i.n/2)]
    i.mu   = sum(i.vals)/i.n
    i.rank = 1
  def tiles(i,lo=0,hi=1): return  xtile(i.vals,lo,hi)
  def __lt__(i,j):        return i.med < j.med
  def __eq__(i,j):
    return cliffsDelta(i.vals,j.vals) and \
            bootstrap(i.vals,j.vals)
  def __repr__(i):
    return '%4s %10s %s' % (i.rank, i.rx, i.tiles())
  def xpect(i,j,b4):
    "Expected value of difference in emans before and after a split"
    n = i.n + j.n
    return i.n/n * (b4.med- i.med)**2 + j.n/n * (j.med-b4.med)**2

  #-- end instance methods --------------------------

  @staticmethod
  def data(**d):
    "convert dictionary to list of treatments"
    return [Rx(k,v) for k,v in d.items()]

  @staticmethod
  def fileIn(f,lo=0,hi=1):
    d={}
    what=None
    for word in words(f):
       x = thing(word)
       if isinstance(x,str):
          what=x
          d[what] = d.get(what,[])
       else:
          d[what] += [x]
    Rx.show(Rx.sk(Rx.data(**d)),lo=lo,hi=hi)

  @staticmethod
  def sum(rxs):
    "make a new rx from all the rxs' vals"
    all = []
    for rx in rxs:
        for val in rx.vals:
            all += [val]
    return Rx(vals=all)

  @staticmethod
  def show(rxs,lo=0,hi=1):
    "pretty print set of treatments"
    tmp=Rx.sum(rxs)
    lo,hi=tmp.vals[0], tmp.vals[-1]
    for rx in sorted(rxs):
        print(THE.rx.show % (rx.rank, rx.rx,
              rx.tiles(lo=lo,hi=hi)))

  @staticmethod
  def sk(rxs):
    "sort treatments and rank them"
    def divide(lo,hi,b4,rank):
      cut = left=right=None
      best = 0
      for j in range(lo+1,hi):
          left0  = Rx.sum( rxs[lo:j] )
          right0 = Rx.sum( rxs[j:hi] )
          now    = left0.xpect(right0, b4)
          if now > best:
              if left0 != right0:
                  best, cut,left,right = now,j,kopy(left0),kopy(right0)
      if cut:
        rank = divide(lo, cut, left, rank) + 1
        rank = divide(cut ,hi, right,rank)
      else:
        for rx in rxs[lo:hi]:
          rx.rank = rank
      return rank
    #-- sk main
    rxs=sorted(rxs)
    divide(0, len(rxs),Rx.sum(rxs),1)
    return rxs

#-------------------------------------------------------
def pairs(lst):
    "Return all pairs of items i,i+1 from a list."
    last=lst[0]
    for i in lst[1:]:
         yield last,i
         last = i

def words(f):
  with open(f) as fp:
    for line in fp:
       for word in line.split():
          yield word

def xtile(lst,lo,hi,
             width= THE.tile.width,
             chops= THE.tile.chops,
             marks= THE.tile.marks,
             bar=   THE.tile.bar,
             star=  THE.tile.star,
             show=  THE.tile.show):
  """The function _xtile_ takes a list of (possibly)
  unsorted numbers and presents them as a horizontal
  xtile chart (in ascii format). The default is a
  contracted _quintile_ that shows the
  10,30,50,70,90 breaks in the data (but this can be
  changed- see the optional flags of the function).
  """
  def pos(p)   : return ordered[int(len(lst)*p)]
  def place(x) :
    return int(width*float((x - lo))/(hi - lo+0.00001))
  def pretty(lst) :
    return ', '.join([show % x for x in lst])
  ordered = sorted(lst)
  lo      = min(lo,ordered[0])
  hi      = max(hi,ordered[-1])
  what    = [pos(p)   for p in chops]
  where   = [place(n) for n in  what]
  out     = [" "] * width
  for one,two in pairs(where):
    for i in range(one,two):
      out[i] = marks[0]
    marks = marks[1:]
  out[int(width/2)]    = bar
  out[place(pos(0.5))] = star
  return '('+''.join(out) +  ")," +  pretty(what)

def thing(x):
  "Numbers become numbers; every other x is a symbol."
  try: return int(x)
  except ValueError:
    try: return float(x)
    except ValueError:
      return x

#-------------------------------------------------------
def _cliffsDelta():
  "demo function"
  lst1=[1,2,3,4,5,6,7]*100
  n=1
  for _ in range(10):
      lst2=[x*n for x in lst1]
      print("_cliffsDelta",
           cliffsDelta(lst1,lst2),n) # should return False
      n*=1.03

def bsTest(n=1000,mu1=10,sigma1=1,mu2=10.2,sigma2=1):
   def g(mu,sigma) : return random.gauss(mu,sigma)
   x = [g(mu1,sigma1) for i in range(n)]
   y = [g(mu2,sigma2) for i in range(n)]
   return n,mu1,sigma1,mu2,sigma2,\
          'same' if bootstrap(x,y) else 'different'

#-------------------------------------------------------

if __name__ == "__main__":
  random.seed(1)
  Rx.fileIn("sk1.csv")
  print("-"*50)
  Rx.fileIn("sk2.csv",lo=0,hi=100)
  print("-"*50)
  _cliffsDelta()
  print("-"*50)
  print("bootstrap",  bsTest(100, 10, .5, 10, .5) )
  print("bootstrap",  bsTest(100, 10, 1, 20, 1) )
  print("bootstrap",  bsTest(100, 10, 10, 10.5, 10) )
  print("-"*50)
  n=1
  l1= [1,2,3,4,5,6,7,8,9,10]*10
  for _ in range(10):
    l2=[n*x for x in l1]
    print('same' if bootstrap(l1,l2) else 'different',n)
    n*=1.02
  n=1
  print("-"*50)
  for _ in range(4):
    print()
    print(n*5)
    Rx.show(Rx.sk(skDemo(n)))
    n*=5


## sk1.csv
x1  0.34  0.49  0.51  0.6
        x2  0.6   0.7   0.8   0.9
        x3  0.15  0.25  0.4   0.35
        x4  0.6   0.7   0.8   0.9
        x5  0.1   0.2   0.3   0.4

## sk2.csv
apples
1 3 5 2 4 6 5 6 2 2 3 4 5 6 4 5 2 2
4 4 5 5 3 2 3 4 5 3 2 2 4 5 5 4 3 2

oranges
4 5 7  8 9 6 5 4 4 5 6 6 7 8 8 7 6 5
4 4 5 6 7 8 9 8 7 5 4

pears
6 7 8 9 0 9 8 7 6 7 8 9 0 9 8 7 6 7 8

bananas
1 12 13 15 14 13 11 12 13 14 15 14 13 11 12 13 14 15
17 18 10 19 10 19 17 16 15 16 15 17 17 19 10 10 18 17
16 15 15 16 17 17 19 19 17

DEcf.5f
79 86 86 77 65 77 72 73 65 56 60 69 75 70
62 56 69 55 67 99

GA.05p10c2
27 27 26 2 25 26 10 20 31 6 17 25 25 19 25 23 16 5 2 23

GA.01p10c1
37 62 55 56 63 36 34 65 56 52 33 46 41 34 40 50 40 61 35 65
5

## skexample.md

      
    Raw
  

              skexample.md
            
          
    Input
x1  0.34  0.49  0.51  0.6
x2  0.6   0.7   0.8   0.9
x3  0.15  0.25  0.4   0.35
x4  0.6   0.7   0.8   0.9
x5  0.1   0.2   0.3   0.4

Output:
   1         x5 (         -----*----      |                        ), 0.100,  0.200,  0.300,  0.300,  0.400
   2         x3 (            -----*-      |                        ), 0.150,  0.250,  0.350,  0.350,  0.400
   3         x1 (                        -*---                     ), 0.340,  0.490,  0.510,  0.510,  0.600
   4         x2 (                         |        -----*----      ), 0.600,  0.700,  0.800,  0.800,  0.900
   4         x4 (                         |        -----*----      ), 0.600,  0.700,  0.800,  0.800,  0.900

Output of python3 sk.py
     1	   1         x5 (      ------*-----       |                        ), 0.100,  0.200,  0.300,  0.300,  0.400
     2	   2         x3 (         ------*--       |                        ), 0.150,  0.250,  0.350,  0.350,  0.400
     3	   3         x1 (                        -*-----                   ), 0.340,  0.490,  0.510,  0.510,  0.600
     4	   4         x2 (                         |           ------*----- ), 0.600,  0.700,  0.800,  0.800,  0.900
     5	   4         x4 (                         |           ------*----- ), 0.600,  0.700,  0.800,  0.800,  0.900
     6	--------------------------------------------------
     7	   1     apples ( -*                      |                        ), 2.000,  3.000,  4.000,  5.000,  5.000
     8	   2    oranges (  -*                     |                        ), 4.000,  5.000,  6.000,  7.000,  8.000
     9	   3      pears (   *                     |                        ), 0.000,  7.000,  7.000,  8.000,  9.000
    10	   4    bananas (      -*-                |                        ), 10.000,  13.000,  15.000,  17.000,  19.000
    11	   5 GA.05p10c2 (        ---*-            |                        ), 5.000,  17.000,  23.000,  25.000,  27.000
    12	   6 GA.01p10c1 (                  -----*-|-----                   ), 34.000,  37.000,  46.000,  56.000,  63.000
    13	   7    DEcf.5f (                         |      ---*-------       ), 56.000,  65.000,  70.000,  77.000,  86.000
    14	--------------------------------------------------
    15	_cliffsDelta True 1
    16	_cliffsDelta True 1.03
    17	_cliffsDelta True 1.0609
    18	_cliffsDelta True 1.092727
    19	_cliffsDelta True 1.1255088100000001
    20	_cliffsDelta True 1.1592740743
    21	_cliffsDelta False 1.1940522965290001
    22	_cliffsDelta False 1.2298738654248702
    23	_cliffsDelta False 1.2667700813876164
    24	_cliffsDelta False 1.304773183829245
    25	--------------------------------------------------
    26	bootstrap (100, 10, 0.5, 10, 0.5, 'same')
    27	bootstrap (100, 10, 1, 20, 1, 'different')
    28	bootstrap (100, 10, 10, 10.5, 10, 'same')
    29	--------------------------------------------------
    30	same 1
    31	same 1.02
    32	same 1.0404
    33	same 1.061208
    34	same 1.08243216
    35	same 1.1040808032
    36	same 1.126162419264
    37	different 1.14868566764928
    38	different 1.1716593810022657
    39	different 1.195092568622311
    40	--------------------------------------------------
    41	
    42	5
    43	   1         x5 (      ------*-----       |                        ), 0.100,  0.200,  0.300,  0.300,  0.400
    44	   1         x3 (        ---------*       |                        ), 0.130,  0.230,  0.380,  0.380,  0.380
    45	   2         x1 (                        -*-----                   ), 0.340,  0.490,  0.510,  0.510,  0.600
    46	   3         x2 (                         |           ------*----- ), 0.600,  0.700,  0.800,  0.800,  0.890
    47	   3         x4 (                         |           ------*----- ), 0.600,  0.700,  0.800,  0.800,  0.900
    48	
    49	25
    50	   1         x5 (      ------*-----       |                        ), 0.100,  0.200,  0.300,  0.300,  0.400
    51	   1         x3 (        ---------*       |                        ), 0.130,  0.230,  0.380,  0.380,  0.380
    52	   2         x1 (                        -*-----                   ), 0.340,  0.490,  0.510,  0.510,  0.600
    53	   3         x2 (                         |           ------*----- ), 0.600,  0.700,  0.800,  0.800,  0.890
    54	   3         x4 (                         |           ------*----- ), 0.600,  0.700,  0.800,  0.800,  0.900
    55	
    56	125
    57	   1         x5 (      ------*-----       |                        ), 0.100,  0.200,  0.300,  0.300,  0.400
    58	   2         x3 (        ---------*       |                        ), 0.130,  0.230,  0.380,  0.380,  0.380
    59	   3         x1 (                        -*-----                   ), 0.340,  0.490,  0.510,  0.510,  0.600
    60	   4         x2 (                         |           ------*----- ), 0.600,  0.700,  0.800,  0.800,  0.890
    61	   4         x4 (                         |           ------*----- ), 0.600,  0.700,  0.800,  0.800,  0.900
    62	
    63	625
    64	   1         x5 (      ------*-----       |                        ), 0.100,  0.200,  0.300,  0.300,  0.400
    65	   2         x3 (        ---------*       |                        ), 0.130,  0.230,  0.380,  0.380,  0.380
    66	   3         x1 (                        -*-----                   ), 0.340,  0.490,  0.510,  0.510,  0.600
    67	   4         x2 (                         |           ------*----- ), 0.600,  0.700,  0.800,  0.800,  0.890
    68	   4         x4 (                         |           ------*----- ), 0.600,  0.700,  0.800,  0.800,  0.900
	#!/usr/bin/env python3
	# vim: sta:et:sw=2:ts=2:sts=2 :

	from copy import deepcopy as kopy
	import sys,random

	"""
	Scott-Knot test + non parametric effect size + significance tests.
	Tim Menzies, 2019. Share and enjoy. No warranty. Caveat Emptor.

	Accepts data as per the following exmaple (you can ignore the "*n"
	stuff, that is just there for the purposes of demos on larger
	and larger data)

	Ouputs treatments, clustered such that things that have similar
	results get the same ranks.

	For a demo of this code, just run

	python3 sk.py

	"""

	#-----------------------------------------------------
	# Examples

	def skDemo(n=5) :
	#Rx.data is one way to run the code
	return Rx.data( x1 =[ 0.34, 0.49 ,0.51, 0.6]*n,
	x2 =[0.6 ,0.7 , 0.8 , 0.89]*n,
	x3 =[0.13 ,0.23, 0.38 , 0.38]*n,
	x4 =[0.6 ,0.7, 0.8 , 0.9]*n,
	x5 =[0.1 ,0.2, 0.3 , 0.4]*n)

	"""
	Another is to make a file

	x1 0.34 0.49 0.51 0.6
	x2 0.6 0.7 0.8 0.9
	x3 0.15 0.25 0.4 0.35
	x4 0.6 0.7 0.8 0.9
	x5 0.1 0.2 0.3 0.4

	Then call

	Rx.fileIn( fileName )

	"""

	#-----------------------------------------------------
	# Config

	class o:
	def __init__(i,d) : i.__dict__.update(d)

	class THE:
	cliffs = o(dull= [0.147, # small
	0.33, # medium
	0.474 # large
	][0])
	bs= o( conf=0.05,
	b=500)
	mine = o( private="_")
	char = o( skip="?")
	rx = o( show="%4s %10s %s")
	tile = o( width=50,
	chops=[0.1 ,0.3,0.5,0.7,0.9],
	marks=[" " ,"-","-","-"," "],
	bar="\|",
	star="*",
	show=" %5.3f")
	#-----------------------------------------------------
	def cliffsDeltaSlow(lst1,lst2, dull = THE.cliffs.dull):
	"""Returns true if there are more than 'dull' difference.
	Warning: O(N)^2."""
	n= gt = lt = 0.0
	for x in lst1:
	for y in lst2:
	n += 1
	if x > y: gt += 1
	if x < y: lt += 1
	return abs(lt - gt)/n <= dull

	def cliffsDelta(lst1, lst2, dull=THE.cliffs.dull):
	"By pre-soring the lists, this cliffsDelta runs in NlogN time"
	def runs(lst):
	for j,two in enumerate(lst):
	if j == 0: one,i = two,0
	if one!=two:
	yield j - i,one
	i = j
	one=two
	yield j - i + 1,two
	#---------------------
	m, n = len(lst1), len(lst2)
	lst2 = sorted(lst2)
	j = more = less = 0
	for repeats,x in runs(sorted(lst1)):
	while j <= (n - 1) and lst2[j] < x: j += 1
	more += j*repeats
	while j <= (n - 1) and lst2[j] == x: j += 1
	less += (n - j)*repeats
	d= (more - less) / (m*n)
	return abs(d) <= dull

	def bootstrap(y0,z0,conf=THE.bs.conf,b=THE.bs.b):
	"""
	two lists y0,z0 are the same if the same patterns can be seen in all of them, as well
	as in 100s to 1000s sub-samples from each.
	From p220 to 223 of the Efron text 'introduction to the boostrap'.
	Typically, conf=0.05 and b is 100s to 1000s.
	"""
	class Sum():
	def __init__(i,some=[]):
	i.sum = i.n = i.mu = 0 ; i.all=[]
	for one in some: i.put(one)
	def put(i,x):
	i.all.append(x);
	i.sum +=x; i.n += 1; i.mu = float(i.sum)/i.n
	def __add__(i1,i2): return Sum(i1.all + i2.all)
	def testStatistic(y,z):
	tmp1 = tmp2 = 0
	for y1 in y.all: tmp1 += (y1 - y.mu)**2
	for z1 in z.all: tmp2 += (z1 - z.mu)**2
	s1 = float(tmp1)/(y.n - 1)
	s2 = float(tmp2)/(z.n - 1)
	delta = z.mu - y.mu
	if s1+s2:
	delta = delta/((s1/y.n + s2/z.n)**0.5)
	return delta
	def one(lst): return lst[ int(any(len(lst))) ]
	def any(n) : return random.uniform(0,n)
	y,z = Sum(y0), Sum(z0)
	x = y + z
	baseline = testStatistic(y,z)
	yhat = [y1 - y.mu + x.mu for y1 in y.all]
	zhat = [z1 - z.mu + x.mu for z1 in z.all]
	bigger = 0
	for i in range(b):
	if testStatistic(Sum([one(yhat) for _ in yhat]),
	Sum([one(zhat) for _ in zhat])) > baseline:
	bigger += 1
	return bigger / b >= conf

	#-------------------------------------------------------
	# misc functions
	def same(x): return x

	class Mine:
	"class that, amongst other times, pretty prints objects"
	oid = 0
	def identify(i):
	Mine.oid += 1
	i.oid = Mine.oid
	return i.oid
	def __repr__(i):
	pairs = sorted([(k, v) for k, v in i.__dict__.items()
	if k[0] != THE.mine.private])
	pre = i.__class__.__name__ + '{'
	def q(z):
	if isinstance(z,str): return "'%s'" % z
	if callable(z): return "fun(%s)" % z.__name__
	return str(z)
	return pre + ", ".join(['%s=%s' % (k, q(v))])

	#-------------------------------------------------------
	class Rx(Mine):
	"place to manage pairs of (TreatmentName,ListofResults)"
	def __init__(i, rx="",vals=[], key=same):
	i.rx = rx
	i.vals = sorted([x for x in vals if x != THE.char.skip])
	i.n = len(i.vals)
	i.med = i.vals[int(i.n/2)]
	i.mu = sum(i.vals)/i.n
	i.rank = 1
	def tiles(i,lo=0,hi=1): return xtile(i.vals,lo,hi)
	def __lt__(i,j): return i.med < j.med
	def __eq__(i,j):
	return cliffsDelta(i.vals,j.vals) and \
	bootstrap(i.vals,j.vals)
	def __repr__(i):
	return '%4s %10s %s' % (i.rank, i.rx, i.tiles())
	def xpect(i,j,b4):
	"Expected value of difference in emans before and after a split"
	n = i.n + j.n
	return i.n/n * (b4.med- i.med)*2 + j.n/n (j.med-b4.med)**2

	#-- end instance methods --------------------------

	@staticmethod
	def data(**d):
	"convert dictionary to list of treatments"
	return [Rx(k,v) for k,v in d.items()]

	@staticmethod
	def fileIn(f,lo=0,hi=1):
	d={}
	what=None
	for word in words(f):
	x = thing(word)
	if isinstance(x,str):
	what=x
	d[what] = d.get(what,[])
	else:
	d[what] += [x]
	Rx.show(Rx.sk(Rx.data(**d)),lo=lo,hi=hi)

	@staticmethod
	def sum(rxs):
	"make a new rx from all the rxs' vals"
	all = []
	for rx in rxs:
	for val in rx.vals:
	all += [val]
	return Rx(vals=all)

	@staticmethod
	def show(rxs,lo=0,hi=1):
	"pretty print set of treatments"
	tmp=Rx.sum(rxs)
	lo,hi=tmp.vals[0], tmp.vals[-1]
	for rx in sorted(rxs):
	print(THE.rx.show % (rx.rank, rx.rx,
	rx.tiles(lo=lo,hi=hi)))

	@staticmethod
	def sk(rxs):
	"sort treatments and rank them"
	def divide(lo,hi,b4,rank):
	cut = left=right=None
	best = 0
	for j in range(lo+1,hi):
	left0 = Rx.sum( rxs[lo:j] )
	right0 = Rx.sum( rxs[j:hi] )
	now = left0.xpect(right0, b4)
	if now > best:
	if left0 != right0:
	best, cut,left,right = now,j,kopy(left0),kopy(right0)
	if cut:
	rank = divide(lo, cut, left, rank) + 1
	rank = divide(cut ,hi, right,rank)
	else:
	for rx in rxs[lo:hi]:
	rx.rank = rank
	return rank
	#-- sk main
	rxs=sorted(rxs)
	divide(0, len(rxs),Rx.sum(rxs),1)
	return rxs

	#-------------------------------------------------------
	def pairs(lst):
	"Return all pairs of items i,i+1 from a list."
	last=lst[0]
	for i in lst[1:]:
	yield last,i
	last = i

	def words(f):
	with open(f) as fp:
	for line in fp:
	for word in line.split():
	yield word

	def xtile(lst,lo,hi,
	width= THE.tile.width,
	chops= THE.tile.chops,
	marks= THE.tile.marks,
	bar= THE.tile.bar,
	star= THE.tile.star,
	show= THE.tile.show):
	"""The function _xtile_ takes a list of (possibly)
	unsorted numbers and presents them as a horizontal
	xtile chart (in ascii format). The default is a
	contracted _quintile_ that shows the
	10,30,50,70,90 breaks in the data (but this can be
	changed- see the optional flags of the function).
	"""
	def pos(p) : return ordered[int(len(lst)*p)]
	def place(x) :
	return int(width*float((x - lo))/(hi - lo+0.00001))
	def pretty(lst) :
	return ', '.join([show % x for x in lst])
	ordered = sorted(lst)
	lo = min(lo,ordered[0])
	hi = max(hi,ordered[-1])
	what = [pos(p) for p in chops]
	where = [place(n) for n in what]
	out = [" "] * width
	for one,two in pairs(where):
	for i in range(one,two):
	out[i] = marks[0]
	marks = marks[1:]
	out[int(width/2)] = bar
	out[place(pos(0.5))] = star
	return '('+''.join(out) + ")," + pretty(what)

	def thing(x):
	"Numbers become numbers; every other x is a symbol."
	try: return int(x)
	except ValueError:
	try: return float(x)
	except ValueError:
	return x

	#-------------------------------------------------------
	def _cliffsDelta():
	"demo function"
	lst1=[1,2,3,4,5,6,7]*100
	n=1
	for _ in range(10):
	lst2=[x*n for x in lst1]
	print("_cliffsDelta",
	cliffsDelta(lst1,lst2),n) # should return False
	n*=1.03

	def bsTest(n=1000,mu1=10,sigma1=1,mu2=10.2,sigma2=1):
	def g(mu,sigma) : return random.gauss(mu,sigma)
	x = [g(mu1,sigma1) for i in range(n)]
	y = [g(mu2,sigma2) for i in range(n)]
	return n,mu1,sigma1,mu2,sigma2,\
	'same' if bootstrap(x,y) else 'different'

	#-------------------------------------------------------

	if __name__ == "__main__":
	random.seed(1)
	Rx.fileIn("sk1.csv")
	print("-"*50)
	Rx.fileIn("sk2.csv",lo=0,hi=100)
	print("-"*50)
	_cliffsDelta()
	print("-"*50)
	print("bootstrap", bsTest(100, 10, .5, 10, .5) )
	print("bootstrap", bsTest(100, 10, 1, 20, 1) )
	print("bootstrap", bsTest(100, 10, 10, 10.5, 10) )
	print("-"*50)
	n=1
	l1= [1,2,3,4,5,6,7,8,9,10]*10
	for _ in range(10):
	l2=[n*x for x in l1]
	print('same' if bootstrap(l1,l2) else 'different',n)
	n*=1.02
	n=1
	print("-"*50)
	for _ in range(4):
	print()
	print(n*5)
	Rx.show(Rx.sk(skDemo(n)))
	n*=5
	apples
	1 3 5 2 4 6 5 6 2 2 3 4 5 6 4 5 2 2
	4 4 5 5 3 2 3 4 5 3 2 2 4 5 5 4 3 2

	oranges
	4 5 7 8 9 6 5 4 4 5 6 6 7 8 8 7 6 5
	4 4 5 6 7 8 9 8 7 5 4

	pears
	6 7 8 9 0 9 8 7 6 7 8 9 0 9 8 7 6 7 8

	bananas
	1 12 13 15 14 13 11 12 13 14 15 14 13 11 12 13 14 15
	17 18 10 19 10 19 17 16 15 16 15 17 17 19 10 10 18 17
	16 15 15 16 17 17 19 19 17

	DEcf.5f
	79 86 86 77 65 77 72 73 65 56 60 69 75 70
	62 56 69 55 67 99

	GA.05p10c2
	27 27 26 2 25 26 10 20 31 6 17 25 25 19 25 23 16 5 2 23

	GA.01p10c1
	37 62 55 56 63 36 34 65 56 52 33 46 41 34 40 50 40 61 35 65
	5