Skip to content

Instantly share code, notes, and snippets.

@sile
Created July 9, 2013 15:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sile/5958231 to your computer and use it in GitHub Desktop.
Save sile/5958231 to your computer and use it in GitHub Desktop.
locality sensitive hashingの実装メモ
;; paper: http://www.isi.edu/natural-language/people/ravichan/papers/clustering.pdf
(defun cos-sim (u v &key product length)
(/ (funcall product u v)
(* (funcall length u) (funcall length v))))
(defun h.r (r u)
(if (>= (funcall r u) 0)
1
0))
;; http://grepcode.com/file/repo1.maven.org/maven2/cc.ivory/ivory/0.7.3/ivory/lsh/projection/WriteRandomVectors.java?av=f
(defun generate-unit-random-vector (num-samples)
(let (
(vector (make-array num-samples :initial-element 0))
(normalization-factor 0)
)
(loop FOR i FROM 0 BELOW num-samples
DO
(multiple-value-bind (x y r)
(loop FOR x = (- (* 2.0 (random 1.0)) 1.0)
FOR y = (- (* 2.0 (random 1.0)) 1.0)
FOR r = (+ (* x x) (* y y))
WHILE (or (> r 1) (= r 0))
FINALLY
(return (values x y r)))
(declare (ignore y))
(let ((f (* x (sqrt (- 2.0 (/ (log r) r))))))
(incf normalization-factor (expt f 2))
(setf (aref vector i) f))))
;; normalize vector
(setf normalization-factor (sqrt normalization-factor))
(loop FOR i FROM 0 BELOW num-samples
FOR val = (aref vector i)
FOR newf = (/ val normalization-factor)
DO
(setf (aref vector i) newf))
(coerce vector 'list)))
(defun calc (input d q b thres)
(let* ((samples (length (car input)))
(rs (loop REPEAT d COLLECT (generate-unit-random-vector samples)))
(bits (loop FOR v IN input
COLLECT
(coerce
(loop FOR r IN rs
COLLECT (calc-h-r v r))
'list #|'bit-vector|#))))
(let ((bits-perm
(loop REPEAT q
FOR perm-table = (make-perm-table d)
COLLECT
(loop FOR i FROM 0
FOR bit IN bits
COLLECT (list i (rotate-bit perm-table bit))))))
(let ((sorted-bits-perm
(loop FOR entry IN bits-perm
FOR sorted = (sort entry (lambda (as bs)
(loop FOR a IN as
FOR b IN bs
DO
(cond ((< a b)
(return t))
((> a b)
(return nil)))
FINALLY
(return nil)))
:key #'second)
COLLECT sorted)))
(let ((m (make-hash-table)))
(loop FOR list IN sorted-bits-perm
DO
(mapl (lambda (cdr)
(destructuring-bind ((i bits) . cdr) cdr
(let ((sims (collect-sim bits cdr b thres)))
(setf (gethash i m)
(delete-duplicates (append sims (gethash i m))
:key #'car)))))
list))
m))
)))
(defun collect-sim (as list b thres &optional (acc '()))
(if (or (= b 0)
(null list))
(nreverse acc)
(destructuring-bind ((i bs) . rest) list
(let ((p (pr as bs)))
(if (>= p thres)
(collect-sim as rest (1- b) thres (cons `(,i ,bs ,p) acc))
(collect-sim as rest (1- b) thres acc))))))
(defun hamming-distance (as bs)
(loop WITH mismatch-count = 0
FOR a IN as
FOR b IN bs
DO
(when (/= a b)
(incf mismatch-count))
FINALLY
(return mismatch-count)))
(defun pr (as bs)
(- 1 (/ (hamming-distance as bs) (length as))))
(defun shuffle (ary)
(dotimes (i (length ary) ary)
(rotatef (aref ary i) (aref ary (random (length ary))))))
(defun make-perm-table (size)
(shuffle (coerce (loop FOR i FROM 0 BELOW size COLLECT i) 'vector)))
(defun rotate-bit (perm-table bit-list)
(loop FOR i FROM 0 BELOW (length perm-table);
FOR j = (aref perm-table i)
DO
(rotatef (elt bit-list i) (elt bit-list j)))
bit-list)
(defun calc-h-r (v r)
(let ((p (product v r)))
(if (>= p 0)
1
0)))
(defun product (v r)
(assert (= (length v) (length r)))
(reduce #'+ (mapcar #'* v r)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment