timm/a12.lisp

## a12.lisp
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The A12 non-parametric test
; Tim Menzies, (c) 2013, tim@menzies.us
; (c) http://creativecommons.org/licenses/by/3.0/
;
; The Vargha and Delaney's A12 statistics is a non-parametric effect
; size measure. Reference: + A. Vargha and H. D. Delaney. A critique
; and improvement of the CL common language effect size statistics of
; McGraw and Wong. Journal of Educational and Behavioral Statistics,
; 25(2):101-132, 2000
;
; Given a performance measure M seen in m measures of X and n measures
; of Y, the A12 statistics measures the probability that running
; algorithm X yields higher M values than running another algorithm Y.
;
;   A12 = #(X >  Y)/mn + 0.5*#(X=Y)/mn
;
; According to Vargha and Delaney, a small, medium, large difference
; between two populations:
;
;  + Big is A12 over 0.71
;  + Medium is A12 over 0.64
;  + Small is A12 over 0.56
;
; In my view, this seems gratitiously different to...
;
;  + Big is A12 over three-quarters (0.75)
;  + Medium is A12 over two-thirds (0.66)
;  + Small is A12 over half (0.5)
;
; Whatever, the following code parameterizes that magic number
; so you can use the standard values if you want to.
;
; While A12 studies two treatments. LA12 handles multiple treatments.
; Samples from each population are sorted by their mean. Then
; b4= sample[i] and after= sample[i+1] and rank(after) = 1+rank(b4)
; if a12 reports that the two populations are different.
;
; To simplify that process, I offer the following syntax. A population
; is a list of numbers, which may be unsorted, and starts with some
; symbol or string describing the population. A12s expects a list of
; such populations. For examples of that syntax, see these functions:

(defun demo ()
  (_a12sa)
  (_a12sb)
  nil)

(defun _a12sa ()
  (terpri)
  (print
   (a12s '((x1 0.34 0.49 0.51 0.60)
	   (x2 0.6  0.7  0.8  0.90)
	   (x3 0.15 0.25 0.4  0.35)
	   (x4 0.6  0.7  0.8  0.90)
	   (x5 0.1  0.2  0.3  0.40)))))

; If this code works, you should see...
;
; ((RANK 1 RX X2 MEAN 0.75 POP (0.9 0.8 0.7 0.6))
;  (RANK 1 RX X4 MEAN 0.75 POP (0.9 0.8 0.7 0.6))
;  (RANK 2 RX X1 MEAN 0.485 POP (0.6 0.51 0.49 0.34))
;  (RANK 3 RX X3 MEAN 0.2875 POP (0.4 0.35 0.25 0.15))
;  (RANK 3 RX X5 MEAN 0.25 POP (0.4 0.3 0.2 0.1)))
;
; i.e. our treatments are sorted in the order x2, x4, x1, x3, x5
; and the first two and the last two get the same ranks.

(defun _a12sb ()
  (terpri)
  (print
   (a12s '((y1 101 100 99   101  99.5)
	   (y2 101 100 99   101 100.0)
	   (y3 101 100 99.5 101  99.0)
	   (y4 101 100 99   101 100.0)))))

; If this code works, you should see...
;
; ((RANK 1 RX Y2 MEAN 100.2 POP (101 101 100 100.0 99))
;  (RANK 1 RX Y4 MEAN 100.2 POP (101 101 100 100.0 99))
;  (RANK 1 RX Y1 MEAN 100.1 POP (101 101 100 99.5 99))
;  (RANK 1 RX Y3 MEAN 100.1 POP (101 101 100 99.5 99.0)))
;
; i.e. all the above are so similar that they all get the same rank.
;
; Finally, the linear search of LA12S is not optimal. A better way
; would be a iterative partitioning approach that finds the split in
; the treatments that maximizes the expected value of the differences
; between the mean of the splits and mean of the original population.
; This is called the Scott-Knott test and is discussed in detail on
; Nikolaos Mittas, Lefteris Angelis: Ranking and Clustering Software
; Cost Estimation Models through a Multiple Comparisons Algorithm.
; IEEE Trans. Software Eng. 39(4): 537-551 (2013).

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defun largeDifference  (pop1 pop2) (bigger pop1 pop2 0.75))
(defun mediumDifference (pop1 pop2) (bigger pop1 pop2 0.66))
(defun smallDifference  (pop1 pop2) (bigger pop1 pop2 0.50))

(defun bigger (pop1 pop2 &optional (enough 0.66))
  (let ((n (a12 pop1 pop2)))
    (values (> n enough)
	    n)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; data structure stuf

(defstruct rx
  list
  name
  (more 0)
  (same 0)
  (rank 1)
  mean)

(defun rx! (lst &optional (> #'>) name)
  (let ((out    (make-rx :name name))
	(sorted (sort lst >)))
    (setf (rx-list   out) sorted
	  (rx-mean out) (mean sorted))
    out))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Top-level driver
(defun a12s(pops &optional (> #'>) (enough 0.66))
   "Given n populations, sort each by their mean
    then rank  i compared to i+1"
  (labels ((show (rx)
	     `(rank ,(rx-rank rx)
	       rx   ,(rx-name rx)
	       mean ,(rx-mean rx)
	       pop  ,(rx-list rx))))
    (let* ((rxs (sort
		 (mapcar #'(lambda (lst)
			     (rx! (cdr lst) > (car lst))) pops)
		 > :key #'rx-mean))
	   (rank 1)
	   out
	   (b4 (car rxs)))
      (setf (rx-rank b4) rank)
      (push  (show b4) out)
      (dolist (after (cdr rxs) (reverse out))
	(incf rank (if (bigger
			(rx-list b4)
			(rx-list after)
			enough) 1 0))
	(setf (rx-rank after) rank)
	(push  (show after) out)
	(setf b4 after)))))

;;
; the guts of the process

(defun a12 (pop1 pop2 &optional (> #'>))
  (let ((m   (length pop1))
	(n   (length pop2))
	(one (rx! pop1 >))
	(two (rx! pop2 >)))
    (labels ((stop ()    (or (null (rx-list one))
			     (null (rx-list two))))
             (head  (x)   (car    (rx-list x)))
	     (size  (x)   (length (rx-list x)))
	     (next! (x)   (pop    (rx-list x)) x)
	     (more! (x n) (incf   (rx-more x)  n))
	     (same! (x)   (incf   (rx-same x)))
	     (walk (l1 l2)
	       (unless (stop)
		 (cond ((funcall > (head l1) (head l2))
			(more! l1 (size l2))
			(walk (next! l1) l2))
		       ((= (head l1) (head l2))
			(same! l1)
			(same! l2)
			(walk l1 (next! l2)))
		       (t
			(walk l2 l1))))))
      (walk one two)
      (+ (/        (rx-more one) (* m n))
	 (/ (* 0.5 (rx-same one)) (* m n))))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;xs
; standard stuff
(defun mean (lst)
  (float (/ (reduce #'+ lst)
	    (length lst))))
	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
	; The A12 non-parametric test
	; Tim Menzies, (c) 2013, tim@menzies.us
	; (c) http://creativecommons.org/licenses/by/3.0/
	;
	; The Vargha and Delaney's A12 statistics is a non-parametric effect
	; size measure. Reference: + A. Vargha and H. D. Delaney. A critique
	; and improvement of the CL common language effect size statistics of
	; McGraw and Wong. Journal of Educational and Behavioral Statistics,
	; 25(2):101-132, 2000
	;
	; Given a performance measure M seen in m measures of X and n measures
	; of Y, the A12 statistics measures the probability that running
	; algorithm X yields higher M values than running another algorithm Y.
	;
	; A12 = #(X > Y)/mn + 0.5*#(X=Y)/mn
	;
	; According to Vargha and Delaney, a small, medium, large difference
	; between two populations:
	;
	; + Big is A12 over 0.71
	; + Medium is A12 over 0.64
	; + Small is A12 over 0.56
	;
	; In my view, this seems gratitiously different to...
	;
	; + Big is A12 over three-quarters (0.75)
	; + Medium is A12 over two-thirds (0.66)
	; + Small is A12 over half (0.5)
	;
	; Whatever, the following code parameterizes that magic number
	; so you can use the standard values if you want to.
	;
	; While A12 studies two treatments. LA12 handles multiple treatments.
	; Samples from each population are sorted by their mean. Then
	; b4= sample[i] and after= sample[i+1] and rank(after) = 1+rank(b4)
	; if a12 reports that the two populations are different.
	;
	; To simplify that process, I offer the following syntax. A population
	; is a list of numbers, which may be unsorted, and starts with some
	; symbol or string describing the population. A12s expects a list of
	; such populations. For examples of that syntax, see these functions:

	(defun demo ()
	(_a12sa)
	(_a12sb)
	nil)

	(defun _a12sa ()
	(terpri)
	(print
	(a12s '((x1 0.34 0.49 0.51 0.60)
	(x2 0.6 0.7 0.8 0.90)
	(x3 0.15 0.25 0.4 0.35)
	(x4 0.6 0.7 0.8 0.90)
	(x5 0.1 0.2 0.3 0.40)))))

	; If this code works, you should see...
	;
	; ((RANK 1 RX X2 MEAN 0.75 POP (0.9 0.8 0.7 0.6))
	; (RANK 1 RX X4 MEAN 0.75 POP (0.9 0.8 0.7 0.6))
	; (RANK 2 RX X1 MEAN 0.485 POP (0.6 0.51 0.49 0.34))
	; (RANK 3 RX X3 MEAN 0.2875 POP (0.4 0.35 0.25 0.15))
	; (RANK 3 RX X5 MEAN 0.25 POP (0.4 0.3 0.2 0.1)))
	;
	; i.e. our treatments are sorted in the order x2, x4, x1, x3, x5
	; and the first two and the last two get the same ranks.

	(defun _a12sb ()
	(terpri)
	(print
	(a12s '((y1 101 100 99 101 99.5)
	(y2 101 100 99 101 100.0)
	(y3 101 100 99.5 101 99.0)
	(y4 101 100 99 101 100.0)))))

	; If this code works, you should see...
	;
	; ((RANK 1 RX Y2 MEAN 100.2 POP (101 101 100 100.0 99))
	; (RANK 1 RX Y4 MEAN 100.2 POP (101 101 100 100.0 99))
	; (RANK 1 RX Y1 MEAN 100.1 POP (101 101 100 99.5 99))
	; (RANK 1 RX Y3 MEAN 100.1 POP (101 101 100 99.5 99.0)))
	;
	; i.e. all the above are so similar that they all get the same rank.
	;
	; Finally, the linear search of LA12S is not optimal. A better way
	; would be a iterative partitioning approach that finds the split in
	; the treatments that maximizes the expected value of the differences
	; between the mean of the splits and mean of the original population.
	; This is called the Scott-Knott test and is discussed in detail on
	; Nikolaos Mittas, Lefteris Angelis: Ranking and Clustering Software
	; Cost Estimation Models through a Multiple Comparisons Algorithm.
	; IEEE Trans. Software Eng. 39(4): 537-551 (2013).

	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

	(defun largeDifference (pop1 pop2) (bigger pop1 pop2 0.75))
	(defun mediumDifference (pop1 pop2) (bigger pop1 pop2 0.66))
	(defun smallDifference (pop1 pop2) (bigger pop1 pop2 0.50))

	(defun bigger (pop1 pop2 &optional (enough 0.66))
	(let ((n (a12 pop1 pop2)))
	(values (> n enough)
	n)))

	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
	; data structure stuf

	(defstruct rx
	list
	name
	(more 0)
	(same 0)
	(rank 1)
	mean)

	(defun rx! (lst &optional (> #'>) name)
	(let ((out (make-rx :name name))
	(sorted (sort lst >)))
	(setf (rx-list out) sorted
	(rx-mean out) (mean sorted))
	out))

	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
	; Top-level driver
	(defun a12s(pops &optional (> #'>) (enough 0.66))
	"Given n populations, sort each by their mean
	then rank i compared to i+1"
	(labels ((show (rx)
	`(rank ,(rx-rank rx)
	rx ,(rx-name rx)
	mean ,(rx-mean rx)
	pop ,(rx-list rx))))
	(let* ((rxs (sort
	(mapcar #'(lambda (lst)
	(rx! (cdr lst) > (car lst))) pops)
	> :key #'rx-mean))
	(rank 1)
	out
	(b4 (car rxs)))
	(setf (rx-rank b4) rank)
	(push (show b4) out)
	(dolist (after (cdr rxs) (reverse out))
	(incf rank (if (bigger
	(rx-list b4)
	(rx-list after)
	enough) 1 0))
	(setf (rx-rank after) rank)
	(push (show after) out)
	(setf b4 after)))))

	;;
	; the guts of the process

	(defun a12 (pop1 pop2 &optional (> #'>))
	(let ((m (length pop1))
	(n (length pop2))
	(one (rx! pop1 >))
	(two (rx! pop2 >)))
	(labels ((stop () (or (null (rx-list one))
	(null (rx-list two))))
	(head (x) (car (rx-list x)))
	(size (x) (length (rx-list x)))
	(next! (x) (pop (rx-list x)) x)
	(more! (x n) (incf (rx-more x) n))
	(same! (x) (incf (rx-same x)))
	(walk (l1 l2)
	(unless (stop)
	(cond ((funcall > (head l1) (head l2))
	(more! l1 (size l2))
	(walk (next! l1) l2))
	((= (head l1) (head l2))
	(same! l1)
	(same! l2)
	(walk l1 (next! l2)))
	(t
	(walk l2 l1))))))
	(walk one two)
	(+ (/ (rx-more one) (* m n))
	(/ (* 0.5 (rx-same one)) (* m n))))))

	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;xs
	; standard stuff
	(defun mean (lst)
	(float (/ (reduce #'+ lst)
	(length lst))))