Skip to content

Instantly share code, notes, and snippets.

@si14

si14/viterbi.clj Secret

Last active December 15, 2015 15:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save si14/df5b9afe28835f8cc154 to your computer and use it in GitHub Desktop.
Save si14/df5b9afe28835f8cc154 to your computer and use it in GitHub Desktop.
(defn argmax-max [f xs]
(letfn [(folder-monoid
([] [Double/NEGATIVE_INFINITY Double/NEGATIVE_INFINITY])
([[_ y1 :as a] [_ y2 :as b]] (if (> y1 y2) a b)))]
(r/fold folder-monoid (r/map (fn [x] [x (f x)]) xs))))
(defn unzip2-with [f xs]
(letfn [(combine-monoid
([] [[] []])
([a b] (map concat a b)))
(reduce-fn [[as bs] x]
(let [[a b] (f x)]
[(conj as a) (conj bs b)]))]
(r/fold combine-monoid reduce-fn xs)))
(defn viterbi-folder [{:keys [qs es pis-prev bps] :as ctx} [x ks-2 ks-1 ks]]
(letfn [(pi [u v w] (* (pis-prev [w u]) (qs [v w u]) (es [x v])))
(pi-finder [[u v :as uv]] [uv (argmax-max (partial pi u v) ks-2)])]
(let [uv-pairs (for [u ks-1, v ks] [u v])
[pis new-bps] (->> (r/map pi-finder uv-pairs)
(into [])
(unzip2-with (fn [[uv [w pi]]]
[[uv pi] [uv w]])))]
(assoc ctx
:pis-prev (into {} pis)
:bps (conj bps (into {} new-bps))))))
(defn viterbi [qs es ks xs]
(let [initial-ctx {:qs qs, :es es, :pis-prev {["*" "*"] 1.0}, :bps []}
ks-adder (fn [idx x] (case idx
0 [x #{"*"} #{"*"} ks]
1 [x #{"*"} ks ks]
[x ks ks ks]))
xs-and-ks (into [] (map-indexed ks-adder xs))
{:keys [bps pis-prev]} (reduce viterbi-folder initial-ctx xs-and-ks)
[[_ y-last :as ys-tail] _] (argmax-max (fn [[u v]] (* (pis-prev [u v])
(qs ["STOP" u v])))
(for [u ks v ks] [u v]))
backtrack (->> (reductions (fn [[y1 y2 :as t] bt] [(bt t) y1])
ys-tail (rseq bps))
(map first)
(cons y-last)
(reverse)
(drop 2)
(into []))]
backtrack))
;;; TESTS
;; Utils
(t/deftest argmax-max-test
(t/is (= [3 9] (argmax-max #(* % %) [1 2 3 2 1]))))
(t/deftest unzip2-with-test
(t/is (= [[1 3 5] [2 4 6]]
(unzip2-with identity [[1 2] [3 4] [5 6]])))
(t/is (= [[[:a 1] [:a 3]] [[:b 2] [:b 4]]]
(unzip2-with (fn [[a b]] [[:a a] [:b b]]) [[1 2] [3 4]]))))
;; Define 2nd order HMM
(def sunny-world
{:qs {["S" "*" "*" ] 0.5
["NS" "*" "*" ] 0.5
["S" "*" "S" ] 0.7
["NS" "*" "S" ] 0.3
["S" "*" "NS"] 0.3
["NS" "*" "NS"] 0.7
["S" "S" "S" ] 0.8
["NS" "S" "S" ] 0.1
["STOP" "S" "S" ] 0.1
["S" "NS" "S" ] 0.3
["NS" "NS" "S" ] 0.6
["STOP" "NS" "S" ] 0.1
["S" "S" "NS"] 0.6
["NS" "S" "NS"] 0.3
["STOP" "S" "NS"] 0.1
["S" "NS" "NS"] 0.1
["NS" "NS" "NS"] 0.8
["STOP" "NS" "NS"] 0.1
}
:es {["U" "S" ] 0.1
["NU" "S" ] 0.9
["U" "NS"] 0.9
["NU" "NS"] 0.1}
:ks #{"S" "NS"}})
;; Viterbi algorithm tests
(t/deftest viterbi-folder-test
(t/is (= {["*" "NS"] 0.05, ["*" "S"] 0.45}
(:pis-prev (viterbi-folder {:qs (:qs sunny-world)
:es (:es sunny-world)
:pis-prev {["*" "*"] 1.0}
:bps []}
["NU" #{"*"} #{"*"} #{"S" "NS"}]))))
(t/is (= {["NS" "NS"] 0.0315,
["NS" "S"] 0.0015,
["S" "NS"] 0.12150000000000001,
["S" "S"] 0.0315}
(:pis-prev (viterbi-folder {:qs (:qs sunny-world)
:es (:es sunny-world)
:pis-prev {["*" "NS"] 0.05,
["*" "S"] 0.45}
:bps []}
["U" #{"*"} #{"S" "NS"} #{"S" "NS"}]
)))))
@si14
Copy link
Author

si14 commented Apr 1, 2013

Ссылка на мануал: http://www.cs.columbia.edu/~mcollins/hmms-spring2013.pdf (страница 18 особенно интересна)

  1. возможно, стоит к херам выпилить reducers (argmax-max станет проще);
  2. нифига не понятно, нельзя ли сделать возврат 2 seq'ов сразу проще (порнография в районе 19 строки)
  3. нифига не понятно, как умнее сделать ks (набор возможных тегов) — он зависит от idx, нужно либо делать fold по enumerated элементам, либо как-то более красиво подмешивать к элементам соответствующие наборы тегов (как сейчас сделано).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment