Skip to content

Instantly share code, notes, and snippets.

@timm
Last active March 1, 2023 03:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timm/a1df97d56a5b2c2b4ec607f66e81b6ae to your computer and use it in GitHub Desktop.
Save timm/a1df97d56a5b2c2b4ec607f66e81b6ae to your computer and use it in GitHub Desktop.
!TABU .lisp
Name. What
tabu.lisp main
tabu-lib.lisp background routines
tabu-eg.lisp test suites
; vim: set ts=2 sw=2 sts=2 et :
(defpackage :tabu (:use :cl))
(in-package :tabu)
(load "tabulib")
(defvar *help* "
tabu.lisp
OPTIONS:
-b bins max bin numbers = 16
-f file csv file = ../data/auto93.csv
-g go start up action = nothing
-K k bayesian K = 1
-m max max num cace = 512
-M m bayesian M = 2
-p p dist coeffecient = 2
-s seed random numbe seed = 10013")
(setf *settings* (settings *help*))
;-------------------------------------------------------------------------------
;## Structs
(defstruct data
"stores `rows`, sumamrized sin `cols`"
rows cols)
(defstruct cols
"stores everything in `all`, independent/dependent things in `x`,`y`"
all x y names klass)
(defstruct num
"sumamrizes a stream of numbers"
(at 0) (txt "") (n 0) (w 1) ; w=1,-1 means "maximize", "minimize"
(hi most-negative-fixnum)
(lo most-positive-fixnum)
(mu 0) (m2 0))
(defstruct sym
"sumamrizes a stream of symbols"
(at 0) (txt "") (n 0) (w 1) has (most 0) mode)
;-------------------------------------------------------------------------------
;## Create
(defun lst->cols (lst &aux (self (make-cols :names lst)))
"column names to cols, then added to `all` and (maybe) `x`, `y`, and `klass`"
(with-slots (all x y klass) self
(loop :for at :from 0 :and txt :in lst :do
(let* ((isNum (and (> (length txt) 0) (upper-case-p (char txt 0))))
(what (if isNum #'make-num #'make-sym))
(col (funcall what :at at :txt txt :w (if (got txt -1 #\-) -1 1))))
(push col all)
(unless (got txt -1 #\X)
(if (got txt -1 #\! txt) (setf klass col))
(if (got txt -1 #\! #\+ #\-) (push col y) (push col x))))))
self)
(defun src->data (src &optional rows &aux (self (make-data)))
"from file if (stringp src); from list if (consp src); mimic structure if (data-p src)"
(labels ((row (x) (add self x)))
(cond ((stringp src) (with-file src #'row))
((consp src) (mapc #'row src))
((data-p src) (row (cols-names (data-cols src)))))
(mapc #'row rows)
self))
;-------------------------------------------------------------------------------
;## Add
(defmethod add ((self data) lst)
"updates `rows` and `cols`"
(aif (data-cols self)
(push (add it lst) (data-rows self))
(setf (data-cols self) (lst->cols lst))))
(defmethod add ((self cols) lst)
"update nums and syms"
(dolist (tmp `(,(cols-x self) ,(cols-y self)) lst)
(dolist (col tmp)
(add col (elt lst (slot-value col 'at))))))
(defmethod add ((self sym) x)
"update frequency counts (in `has`) and `most` and `mode`"
(with-slots (has n mode most) self
(unless (eql x #\?)
(incf n)
(incf (freq x has))
(if (> (freq x has) most) (setf most (freq x has) mode x)))))
(defmethod add ((self num) x ) ;;; Add one thing, updating 'lo,hi'
"updates `lo`, `hi`, `mu`, `sd`"
(with-slots (n lo hi mu m2) self
(unless (eq x #\?)
(incf n)
(let ((d (- x mu)))
(incf mu (/ d n))
(incf m2 (* d (- x mu)))
(setf lo (min x lo)
hi (max x hi))))))
;-------------------------------------------------------------------------------
;## Queries
(defmethod mid ((self sym)) (sym-mode self))
(defmethod mid ((self num)) (num-mu self))
(defmethod div ((self sym))
"diversity (entropy)."
(with-slots (has n) self
(labels ((fun (p) (if (<= p 0) 0 (* -1 (* p (log p 2))))))
(loop for (_ . n1) in has sum (fun (/ n1 n))))))
(defmethod div ((self num))
"return standard deviation"
(with-slots (n m2) self (if (<= n 1) 0 (sqrt (/ m2 (- n 1))))))
(defmethod like1 ((self sym) x prior)
(with-slots (n has) self
(/ (+ (freq x has) (* (? m) prior))
(+ n (? m)))))
(defmethod like1 ((self num) x _)
(with-slots (mu n) self
(let ((sd (div self))
(tiny 1E-32))
(cond ((< x (- mu (* 4 sd))) 0)
((> x (+ mu (* 4 sd))) 0)
(t (let ((denom (sqrt (* 2 pi sd sd)))
(nom (exp (/ (* -1 (expt (- x mu) 2))
(+ tiny (* 2 sd sd))))))
(/ nom (+ denom tiny))))))))
'(defmethod like ((self data) row nall nh)
(with-slots (rows cols) self
(let ((prior (/ (1+ (? k) (length rows)) (+ nall (* nh (? k)))))
(+ (log prior)
(loop :for col :in (cols-x cols) :sum
(let ((x (elt row (col-at col))))
(if (eql x #\?)
0
(log (like1 col x prior))))))))))
(defmethod classify ((self data) row hs &aux out (most most-negative-fixnum))
(dolist (h hs (values out most))
(let ((tmp (like h row (data-n self) (1+ (length hs)))))
(if (> tmp most) (setq most tmp
out h)))))
; vi: set ts=2 sw=2 sts=2 et :
(load "tabu")
(defun tests ()
`((rand
,(lambda (&aux (n (make-num)))
(dotimes (i 1000) (add n (expt (rand) 2)))
(assert (<= .35 (mid n) .36))
(assert (<= .30 (div n) .31))))
(num
,(lambda (&aux (n (make-num)))
(dotimes (i 1000) (add n i))
(assert (<= 498 (mid n) 502))))
(sym
,(lambda (&aux (s (make-sym)))
(dolist (x '(a a a a b b c)) (add s x))
(assert (<= 1.37 (div s) 1.38) () "sym")))
(data
,(lambda (&aux (d (src->data (? file))))
(assert (eql 398 (length (data-rows d))))
(assert (eql 4 (length (cols-x (data-cols d)))))))
))
(let ((fails 0)
(b4 (copy-tree (settings *help* (args)))))
(loop :for (key fun) :in (tests) :do
(setf *settings* (copy-tree b4)
*seed* (? seed))
(when (member (? go) (list "all" key) :key #'equalp)
(format t "~%⚠️ ~a " key)
(cond ((funcall fun) (princ " PASSED ✅"))
(t (princ " FAILED ❌")
(incf fails)))))
(bye fails))
; vi: set ts=2 sw=2 sts=2 et :
;## Globals
(defvar *settings* nil)
;## Macros
(defmacro ? (x &optional (lst '*settings*))
"alist accessor, defaults to searching `*settings*`"
`(cdr (assoc ',x ,lst)))
(defmacro aif (test then &optional else)
"used to test on a result that is also needed by `then`"
`(let ((it ,test)) (if it ,then ,else)))
(defmacro freq (x lst &optional (init 0))
"frequency counts for small group of symbols (say, less than 50)"
`(cdr (or (assoc ,x ,lst :test #'equal)
(car (setf ,lst (cons (cons ,x ,init) ,lst))))))
;-------------------------------------------------------------------------------
;## Sys
(defun args ()
"accessing command-line flats"
#+clisp ext:*args*
#+sbcl sb-ext:*posix-argv*)
(defun bye (&optional (x 0))
"quit list"
#+clisp (ext:exit x)
#+sbcl (sb-ext:exit :code x))
;-------------------------------------------------------------------------------
(defvar *seed* 10013)
(defun rand (&optional (n 1))
"random float 0.. < n"
(setf *seed* (mod (* 16807.0d0 *seed*) 2147483647.0d0))
(* n (- 1.0d0 (/ *seed* 2147483647.0d0))))
(defun rint (&optional (n 1) &aux (base 10000000000.0))
"random int 0..n-1"
(floor (* n (/ (rand base) base))))
;-------------------------------------------------------------------------------
;## Lists
(defun per (seq &optional (p .5))
(elt seq (floor (* (min .999999 (max 0 p)) (length seq)))))
;-------------------------------------------------------------------------------
;## Strings
(defun trim (s)
"kill whitespace at start, at end"
(string-trim '(#\Space #\Tab #\Newline) s))
(defun got (s n &rest chars)
"Does `s` hold any of `chars` at position `n` (negative `n` means 'from end of string')"
(let ((n (if (>= n 0) n (+ (length s) n))))
(if (and (stringp s) (>= (1- (length s)) n))
(dolist (c chars)
(if (eql c (char s n))
(return-from got t))))))
(defun split (s &optional (sep #\,) (filter #'thing) (here 0))
"split `s`, divided by `sep` filtered through `filter`"
(let* ((there (position sep s :start here))
(word (funcall filter (subseq s here there))))
(labels ((tail () (if there (split s sep filter (1+ there)))))
(if (equal word "") (tail) (cons word (tail))))))
(defun words (s)
"divide a string on space"
(split s #\Space #'trim))
;-------------------------------------------------------------------------------
;## Strings to Things
(defun thing (s &aux (s1 (trim s)))
"coerce `s` into a number or string or t or nil or #\?"
(cond ((equal s1 "?") #\?)
((equal s1 "t") t)
((equal s1 "nil") nil)
(t (let ((n (read-from-string s1 nil nil)))
(if (numberp n) n s1)))))
(defun with-file (file fun &optional (filter #'split))
"call `fun` for each line in `file`"
(with-open-file (s file)
(loop (funcall fun (funcall filter (or (read-line s nil) (return)))))))
;-------------------------------------------------------------------------------
;## Settings
(defun settings (s &optional args)
"for lines like ' -Key Flag ..... Default', return `(KEY . DEFAULT)`"
(loop
:for (flag key . lst)
:in (split s #\NewLine #'words)
:if (got flag 0 #\-)
:collect (cons (intern (string-upcase key))
(cli args flag (thing (car (last lst)))))))
(defun cli (lst flag b4)
"if `flag` in `lst`, then update `b4` from `lst`"
(aif (member flag lst :test 'equal)
(cond ((eql b4 t) nil)
((eql b4 nil) t)
(t (thing (second it))))
b4))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment