Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ato/248018 to your computer and use it in GitHub Desktop.
Save ato/248018 to your computer and use it in GitHub Desktop.
;; LibSVM Model parser.
(use 'clojure.contrib.duck-streams)
(use 'clojure.contrib.seq-utils)
(require '[clojure.contrib.str-utils2 :as s])
(def *root* "/data/")
;; (set! *print-length* 5) eval in repl
(defn model-file [name] (str *root* name))
(defn str-to-int [s] (Integer/parseInt s))
(defn str-to-float [s] (Float/parseFloat s))
(defn str-to-double [s] (Double/parseDouble s))
;; turn "key_word value" into {:key-word value}
(defn parse-header-line [line]
(let [[_ key val] (re-matches #"^([^ ]*) (.*)$" line)
key (keyword (s/replace key "_" "-"))]
{key val}))
;; parse an <feature:int>:<value:float> item into {feature value}
(defn parse-sparse-item [item]
(let [[feature value] (s/split item #":")]
{(str-to-int feature) (str-to-float value)}))
;; SV lines look like [class1-vs-class2 ... class1-vs.classN <feature>:<value>*]
;; where feature is an int and value a float
(defn process-sv-line [classes line]
(let [nr-alpha (dec classes)
items (s/split line #" ")
;; alpha values are stored as doubles
alpha (map str-to-double (take nr-alpha items))
sv (into {} (map parse-sparse-item (drop nr-alpha items)))]
(list alpha sv)))
;; reads the model and binds it to *model*
(def *model*
;; read the header using a loop until we he "^SV$"
(let [[header lines]
(loop [header {} ;; header accumulator
lines (read-lines (model-file "my.model"))] ;; lines in the model, loosing head each time.
(let [line (first lines)]
(if (re-matches #"^SV$" line)
[header (next lines)]
(recur (into header (parse-header-line line))
(next lines)))))]
;; coerce ints and floats from the header.
(let [header (assoc header
:nr-class (str-to-int (:nr-class header))
:total-sv (str-to-int (:total-sv header))
:rho (map str-to-float (s/split (:rho header) #" "))
:label (map str-to-int (s/split (:label header) #" "))
:nr-sv (map str-to-int (s/split (:nr-sv header) #" ")))]
;; construct return value
{:header (assoc header
:offsets (cons 0 (reductions + (butlast (:nr-sv header)))))
:data (map (partial process-sv-line (:nr-class header)) lines)})))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment