Skip to content

Instantly share code, notes, and snippets.

@asherrecv
Last active October 4, 2023 14:40
Show Gist options
  • Save asherrecv/99d7f995a8676f4a60a82861e729cacf to your computer and use it in GitHub Desktop.
Save asherrecv/99d7f995a8676f4a60a82861e729cacf to your computer and use it in GitHub Desktop.
Clojure autocompletion with bigrams
(require '[clojure.set :as set])
(defn bigrams-with-position [word]
(map (fn [a b i] [(str a b) i])
(seq word)
(seq (drop 1 word))
(range)))
(defn create-singleton-bigram-index [bigram position word]
{bigram {position #{word}}})
(defn merge-position-index [lhs rhs]
(merge-with set/union lhs rhs))
(defn merge-bigram-index [lhs rhs]
(merge-with merge-position-index lhs rhs))
(defn create-bigram-index-of-word [word]
(reduce
merge-bigram-index
(map (fn [[bigram position]]
(create-singleton-bigram-index bigram position word))
(bigrams-with-position word))))
(defn create-bigram-index-of-dict [dict]
(reduce
merge-bigram-index
(map create-bigram-index-of-word dict)))
(defn query [bigram-index query-str]
(let [matches
(map (fn [[bigram position]] (get-in bigram-index [bigram position]))
(bigrams-with-position query-str))]
(if (empty? matches)
#{}
(reduce
set/intersection
matches))))
(comment
(def dict ["go" "goroutine" "golang" "gogo" "gorage"])
(bigrams-with-position "asd")
;; => (["as" 0] ["sd" 1])
(create-bigram-index-of-word "gogog")
;; => {"go" {0 #{"gogog"}, 2 #{"gogog"}},
;; "og" {1 #{"gogog"}, 3 #{"gogog"}}}
(create-bigram-index-of-dict ["go" "gol" "gogog"])
;; => {"go" {0 #{"gol" "go" "gogog"}, 2 #{"gogog"}},
;; "ol" {1 #{"gol"}},
;; "og" {1 #{"gogog"}, 3 #{"gogog"}}}
)
(comment
(def dict ["go" "goroutine" "golang" "gogo" "gorage"])
(def index (create-bigram-index-of-dict dict))
(query index "gor")
;; => #{"gorage" "goroutine"}
(query index "gog")
;; => #{"gogo"}
(query index "gol")
;; => #{"golang"}
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment