Skip to content

Instantly share code, notes, and snippets.

@theJenix
Last active December 23, 2015 23:59
Show Gist options
  • Save theJenix/6713887 to your computer and use it in GitHub Desktop.
Save theJenix/6713887 to your computer and use it in GitHub Desktop.
k-means clustering in LISP
; K-means Clustering
(defun diff-sq (x y)
(expt (- x y) 2))
(defun squared-differences (l1 l2)
(mapcar (lambda(x y) (float (expt (- x y) 2))) l1 l2))
(defun sum-squared-differences (l1 l2)
(reduce #'+ (squared-differences l1 l2)))
(defun laverage (args)
(when args
(/ (reduce #'+ args) (length args))))
(defun argx (fun tuples)
(cadr (find (reduce fun tuples :key #'car) tuples :key #'car)))
; Takes in a list of tuples (2 element nested lists) in the following
; format:
; (value, argument)
; and returns the argument from the minimum value
(defun argmin (tuples)
(argx #'min tuples))
(defun argmax (tuples)
(argx #'max tuples))
; Return a list of distance,center tuples
(defun compute-distance (centers x)
(loop for center in centers collect (list (diff-sq center x) center)))
(defun assign-center (centers x)
(argmin (compute-distance centers x)))
; Given a set of centers and a point, assign each point to the one closest center
; Ties are broken by assigning x to the first center encountered
(defun assign-centers (centers xs)
(loop for x in xs collect (list x (assign-center centers x))))
(defun estimate-center (assigned)
(laverage (mapcar #'car assigned)))
(defun estimate-centers (assigned centers)
(loop for c in centers
collect (estimate-center (remove-if (complement (lambda (x) (= (cadr x) c))) assigned))))
(defun rand-range (minval maxval)
(+ minval (random (1+ (- maxval minval)))))
(defun generate-k-random (k minval maxval)
(format t "Generating ~d random values from between ~d and ~d~C" k minval maxval #\linefeed)
(loop for i from 1 upto k collect (rand-range minval maxval)))
(defun k-means-clustering (data k epsilon)
(let ((centers (generate-k-random k (reduce #'min data) (reduce #'max data))))
(loop for i from 1 do
(format t "Iteration ~d" i)
(setf last_centers centers)
(setf assigned (assign-centers centers data))
(setf centers (estimate-centers assigned centers))
(setf sse (sum-squared-differences centers last_centers))
(format t " - Change of ~f (SSD)~C" sse #\linefeed)
while (>= sse epsilon))
(print (mapcar #'round centers))
(identity centers)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment