Skip to content

Instantly share code, notes, and snippets.

@rahulpilani
Created December 30, 2011 23:13
Show Gist options
  • Save rahulpilani/1541958 to your computer and use it in GitHub Desktop.
Save rahulpilani/1541958 to your computer and use it in GitHub Desktop.
Prefix tree creation and traversal in Clojure
(ns prefix-tree)
;Record representing a node in the prefix tree.
;strings: The set of strings that the present node represents
;edges: Map representing edges emanating from this node.
;The key being the first letter of the label and the value being the label and the actual node being pointed to."
(defrecord Node [strings edges])
(defn shorter-string [a b]
"Return the shorter string of the two being passed in."
(min-key count a b))
(defn match-string [label string]
"Given an edge label and a string to match against, determine if the two match and if so at what index."
(defn mismatch-index [seq]
(first
(filter
(fn [index]
(not
(=
(.charAt label index)
(.charAt string index)))) seq)))
(let [ss (shorter-string label string) matched-seq (range (.length ss)) m-index (mismatch-index matched-seq)]
(if m-index m-index (.length ss))))
(defn substring-or-nil
"Gets the substring from a string using the same semantics as Java. Additionally converts empty strings to nil
to aid in truthyness."
([string start] (substring-or-nil string start (.length string)))
([string start end]
(let [substring (.substring string start end)]
(if-not (empty? substring) substring nil))))
(defn create-substrings [label string]
"Given an edge label and a string, gets the common string between the two and whats left of the label and the string
after removing the common string.
Returns a map containing 3 keys: common, label-substring and string-substring"
(let [match (match-string label string)]
(hash-map :common (substring-or-nil label 0 match) :label-substring (substring-or-nil label match) :string-substring (substring-or-nil string match))))
(defn partial-match? [match-string]
"Returns whether the match-string passed in represents a partial match."
(some match-string [:string-substring :label-substring]))
(defn first-char [string]
"Return the first character of a string."
(.charAt string 0))
(defn partial-label-match? [match-string ]
"Whether the label matched partially or not."
(match-string :label-substring))
(defn edge-entry [label node]
"Create a map entry for an edge given a label and a node.
The key is the first character of the label and the value is the [label edge] tuple"
[(first-char label) [label node]])
(defn add-edge [node label child]
"Add an edge to node with the given label and child."
(if-not (empty? label)
(assoc node :edges (apply assoc (.edges node) (edge-entry label child)))
node))
(defn add-string-ref
"Add a string reference to a node. Usually this is done after adding an edge"
([node string] (add-string-ref node string (.edges node)))
([node string edges]
(Node. (conj (.strings node) string) edges)))
(defn add-child
"Add a child to a node."
([node label string] (add-child node (Node. #{string} {}) label string))
([node new-child label string]
(->
node
(add-string-ref string)
(add-edge label new-child))))
(defn split-edge [node old-child substrings string]
"Split an edge for a node, whose child is old-child.
Substrings: the common, label and string substrings for the new edge,
string: the string that caused the split."
(let [root-label (:common substrings) label-child (:label-substring substrings) string-child (:string-substring substrings)]
(->
old-child
(add-string-ref string {})
(add-edge label-child old-child)
(add-edge string-child (Node. #{string} {}))
((fn [mid-node] (add-edge node root-label mid-node)))
(add-string-ref string))))
(defn print-tree [node]
"Print the tree represented by the given node."
(defn print-tree-internal [node prefix]
(doseq [child (map val (.edges node)) :while (not (empty? (.edges node)))]
(println prefix (first child))
(print-tree-internal (second child) (str "\t" prefix))))
(print-tree-internal node ""))
(defn add-string [root string]
"Add the given string to the tree represented by the root node. This is the main method of this module.
Returns an entirely new tree after adding the string."
(defn add-string-node [node substring]
(if-not (some empty? [string substring])
(let [initial-char (.charAt substring 0)]
(if-let [edge ((.edges node) initial-char)]
(let [label (first edge) child (second edge) substrings (create-substrings label substring)]
(if (substrings :label-substring) ;if any label substring is left, then there is no more to search. create a new node and return.
(split-edge node child substrings string)
(if (substrings :string-substring)
(add-child node (add-string-node child (substrings :string-substring)) label string)
node)))
(add-child node substring string)))))
(add-string-node root string))
(defn walk-tree [word root]
"Search for the the given word in the tree. Returns back a set that are represented by the matching node."
(defn walk-tree-internal [substring node]
(if-let [edge ((.edges node) (.charAt substring 0))]
(let [label (first edge) child (second edge) substrings (create-substrings label substring) label (:label-substring substrings) next-sub (:string-substring substrings)]
(if (and label next-sub)
#{}
(if label
(.strings child)
(if-not next-sub
(.strings child)
(recur (:string-substring substrings) child)))))
#{}))
(walk-tree-internal word root))
@amalloy
Copy link

amalloy commented Jan 2, 2012

(def shorter-string (partial min-key count)) is one tip.

@rahulpilani
Copy link
Author

Very interesting. Didn't know about min-key. Thanks for the tip!

@decoursin
Copy link

Hey, thanks for posting. Is there a reason you went with this type of implementation rather than something more conventional?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment