Skip to content

Instantly share code, notes, and snippets.

@honzabrecka honzabrecka/bayesian.cljs
Last active Feb 20, 2017

Embed
What would you like to do?
Bayesian classifier implemented in Clojure (well, it's actually in ClojureScript).
;------------------------------------------------
; bayesian
(def empty-training-set {:features {} :categories {}})
(defn train
[features category]
(reduce #(assoc-in %1 [:features %2] {category 1})
(assoc-in empty-training-set [:categories category] 1)
features))
(defn merge-trainings
[a b]
(merge-with (fn [a b]
(merge-with #(if (map? %1) (merge-with + %1 %2) (+ %1 %2)) a b)) a b))
(defn- inv-chi2
[chi, df]
(let [m (* chi 0.5)
exp (.exp js/Math (- m))
to (.floor js/Math (* df 0.5))
term (reduce #(conj %1 (* (first %1) (/ m %2))) (conj '() exp) (range 1 to))]
(min (reduce + 0 term) 1)))
(defn- get-categories
[training-set]
(keys (:categories training-set)))
(defn count-feature
[training-set feature category]
(if (and
(contains? (:features training-set) feature)
(contains? (:categories training-set) category))
(get-in training-set [:features feature category])
0))
(defn count-category
[training-set category]
(if (contains? (:categories training-set) category)
(get-in training-set [:categories category])
0))
(defn count-total
[training-set]
(reduce + 0 (vals (:categories training-set))))
(defn feature-probability
[training-set feature category]
(let [c (count-category training-set category)
f (count-feature training-set feature category)]
(if (= c 0)
0
(/ f c))))
(defn category-probability
[training-set feature category]
(let [fp (feature-probability training-set feature category)]
(if (= fp 0)
0
(/ fp (reduce #(+ %1 (feature-probability training-set feature %2)) 0 (get-categories training-set))))))
(defn weighted-probability
[training-set feature category f]
(let [totals (reduce #(+ %1 (count-feature training-set feature %2)) 0 (get-categories training-set))
probability (f training-set feature category)
start 0.5]
(/ (+ start (* totals probability)) (+ totals 1))))
(defn document-probability
[training-set features category]
(reduce #(* %1 (weighted-probability training-set %2 category feature-probability)) 1 features))
(defn naive-probability
[training-set features category]
(* (document-probability training-set features category)
(/ (count-category training-set category) (count-total training-set))))
(defn fisher-probability
[training-set features category]
(inv-chi2
(* -2 (js/Math.log (reduce #(* %1 (weighted-probability training-set %2 category category-probability)) 1 features)))
(* 2 (count features))))
(defn clasify
[f training-set features]
(reduce #(assoc %1 %2 (f training-set features %2)) {} (get-categories training-set)))
;------------------------------------------------
; use
(defn get-words
[sentence]
(->> (str/split sentence " ")
(filter #(let [l (count %)]
(and (> l 1) (< l 20))))
(map str/lower-case)
(distinct)))
(def raw-data [["hey Honza what are you doing" "good"]
["make quick money now" "bad"]
["watch this quick brown fox" "good"]])
(def trained-data (reduce
#(merge-trainings %1 (train (get-words (nth %2 0)) (nth %2 1)))
empty-training-set
raw-data))
(clasify fisher-probability trained-data (get-words "do you want to make some quick money"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.