Skip to content

Instantly share code, notes, and snippets.

@angerman
Created December 3, 2009 16:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save angerman/248320 to your computer and use it in GitHub Desktop.
Save angerman/248320 to your computer and use it in GitHub Desktop.
;; LibSVM Model Parser
(use 'clojure.contrib.duck-streams)
(use 'clojure.contrib.seq-utils)
(require '[clojure.contrib.str-utils2 :as s])
(load "coll-util")
(def *root* "/Users/angerman/Downloads/libsvm-2.9-dense/tools/")
;; (set! *warn-on-reflection* true) eval in repl
(defn model-file [name] (str *root* name))
(defn str-to-int [s] (Integer/parseInt s))
(defn str-to-float [s] (Float/parseFloat s))
(defn str-to-double [s] (Double/parseDouble s))
;; turn "key_word value" into {:key-word value}
(defn parse-header-line [line]
(let [[_ key val] (re-matches #"^([^ ]*) (.*)$" line)
key (keyword (s/replace key "_" "-"))]
{key val}))
;; parse an <feature:int>:<value:float> item into {feature value}
(defn parse-sparse-item [item]
(let [[feature value] (s/split item #":")]
{(str-to-int feature) (str-to-float value)}))
;; SV lines look like [class1-vs-class2 ... class1-vs.classN <feature>:<value>*]
;; where feature is an int and value a float
(defn process-sv-line [classes line]
(let [nr-alpha (dec classes)
items (s/split line #" ")
;; alpha values are stored as doubles
alpha (map str-to-double (take nr-alpha items))
sv (into {} (map parse-sparse-item (drop nr-alpha items)))]
(list alpha sv)))
(defn rewrite-header [header]
"prettifyes the header map"
(let [nr-sv (map str-to-int (s/split (:nr-sv header) #" "))]
(assoc header
:nr-class (str-to-int (:nr-class header))
:total-sv (str-to-int (:total-sv header))
:rho (map str-to-float (s/split (:rho header) #" "))
:label (map str-to-int (s/split (:label header) #" "))
:nr-sv nr-sv
:offsets (cons 0 (reductions + (butlast nr-sv))))))
(defn parse-header [file]
(loop [header {}
lines (read-lines file)]
(let [line (first lines)]
(if (re-matches #"^SV$" line)
[(rewrite-header header)
(next lines)]
(recur (into header (parse-header-line line))
(next lines))))))
(defn drop-header [file]
(loop [lines (read-lines file)]
(let [line (first lines)]
(if (re-matches #"^SV$" line)
(next lines)
(recur (next lines))))))
(defn extract-features [line]
(map (comp str-to-int second) (re-seq #"(\d+):" line)))
(defn compute-metrics [[header lines]]
(assoc header :features (reduce combine-sorted (map extract-features lines))))
; (loop [[ln & lns*] lines
; features nil
; experiments 0]
; (printf "%d, %d\n" experiments (count features))
; (if (not ln)
; (assoc header
; :experiments experiments
; :features features)
; (recur lns*
; (time (combine-sorted features (extract-features ln)))
; (inc experiments)))))
;; reads the model and binds it to *model*
(def *model* (compute-metrics (parse-header (model-file "upps.data.model"))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment