Last active
October 12, 2015 19:18
-
-
Save jackrusher/4074823 to your computer and use it in GitHub Desktop.
Autotaxer in Clojure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn load-taxonomy [file] | |
(reduce (fn [trie [k v]] (assoc-in trie k { :ssc v })) {} | |
(map (fn [line] (let [[path ssc] (string/split line #"\t")] | |
(list (string/split path #"_") ssc))) | |
(remove #(.startsWith % "#") (string/split-lines (slurp file)))))) | |
(defn match-in-pool [taxonomy pool] | |
(if-let [this-match (taxonomy (first pool))] | |
(let [next-match (match-in-pool this-match (rest pool))] | |
(if (next-match :ssc) | |
next-match | |
(if-let [ssc (this-match :ssc)] | |
{ :ssc ssc :pool (rest pool) } | |
{ :ssc nil :pool (rest pool) }))) | |
{ :ssc nil :pool (rest pool) })) | |
(defn autotax [tax pool] | |
(frequencies | |
(remove nil? | |
(loop [p pool matches []] | |
(let [match (match-in-pool tax p) | |
matches (conj matches (match :ssc))] | |
(if (empty? (match :pool)) matches (recur (match :pool) matches))))))) | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
;; test the above | |
(def megatax (load-taxonomy "data/mega.tsv")) | |
(defn string-to-pool [s] | |
(remove #(.isEmpty %) (string/split (string/lower-case s) #"[^\w]"))) | |
(autotax megatax (string-to-pool "Wheel and Tire Guide → Wheels, Tires, Wheel Alignment → General Information → Recommended Summer Tires")) | |
;; => {"/suspension/wheels_tires" 5} | |
(autotax megatax (string-to-pool "Automatic Transmission → Torque Converter → General Information → Torque Converter, Draining")) | |
;; => {"/drivetrain/transmission/automatic" 3} | |
(autotax megatax (string-to-pool "Brake System → Hydraulic Components → Description and Operation → Rear Brake Caliper Assembly Overview")) | |
;; => {"/brakes" 1, "/brakes/disc" 1} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment