Last active
August 1, 2022 14:35
-
-
Save RutledgePaulV/6377147407d08f9ae481572548c8c649 to your computer and use it in GitHub Desktop.
bktree.clj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn create-index [distance-fn] | |
(with-meta {} {:distance-fn distance-fn})) | |
(defn index->root [index] | |
(-> index meta :root)) | |
(defn index->distance-fn [index] | |
(-> index meta :distance-fn)) | |
(defn index-many [index objects] | |
(letfn [(index-one | |
([index x] | |
(if-some [root (index->root index)] | |
(index-one root index x) | |
(vary-meta index assoc :root x))) | |
([root index x] | |
(let [distance ((index->distance-fn index) root x) | |
child (get-in index [root distance])] | |
(if (some? child) | |
(recur child index x) | |
(update index root | |
(fn [value] | |
(if (some? value) | |
(assoc value distance x) | |
(sorted-map distance x))))))))] | |
(reduce index-one index objects))) | |
(defn query [index tolerance query] | |
(letfn [(search [root] | |
(let [distance ((index->distance-fn index) root query) | |
min (- distance tolerance) | |
max (+ distance tolerance) | |
children (or (get index root) (sorted-map)) | |
frontier (map val (subseq children >= min <= max))] | |
(if (empty? frontier) | |
(if (<= distance tolerance) | |
(list {:node root :distance distance}) | |
()) | |
(if (<= distance tolerance) | |
(cons {:node root :distance distance} (mapcat search frontier)) | |
(mapcat search frontier)))))] | |
(reduce | |
(fn [m x] | |
(update m (:distance x) (fnil conj #{}) (:node x))) | |
(sorted-map) | |
(search (index->root index))))) | |
(comment | |
(def index (create-index clj-fuzzy.levenshtein/distance)) | |
(def index (index-many index ["book" "rook" "nooks" "boon"])) | |
index | |
; => {"book" {1 "rook", 2 "nooks"}, "rook" {2 "boon"}} | |
(query index 1 "soon") | |
; => {1 #{"boon"}} | |
(query index 2 "soon") | |
; => {1 #{"boon"}, 2 #{"book" "rook"}} | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
http://blog.notdot.net/2007/4/Damn-Cool-Algorithms-Part-1-BK-Trees