Skip to content

Instantly share code, notes, and snippets.

@yagays
Created February 3, 2012 05:21
Show Gist options
  • Save yagays/1728282 to your computer and use it in GitHub Desktop.
Save yagays/1728282 to your computer and use it in GitHub Desktop.
naive-bayes(clojure)
;; 訓練データ
(def text_classified_p '(["good" "bad" "good" "good"]
["exciting" "exciting"]
["good" "good" "exciting" "boring"]))
(def text_classified_n '(["bad" "boring" "boring" "boring"]
["bad" "good" "bad"]
["bad" "bad" "boring" "exciting"]))
;;多変数ベルヌーイモデル
(defn train [features]
(reduce (fn [model f] (assoc model f (get model f 1))) {} features))
(defn count-wordset [training-data]
(apply merge-with + (map train training-data)))
(defn mBm-MLestimate [documents datasets]
(* (/ (count datasets) (count (concat text_classified_p text_classified_n)))
(apply * (map #(/ % (count datasets))
(vals (select-keys (count-wordset datasets) documents))))
(apply * (map #(- 1 (/ % (count datasets)) )
(vals (apply dissoc (count-wordset datasets) documents))))))
(defn classify [d]
(sorted-map
:positive (mBm-MLestimate d text_classified_p)
:negative (mBm-MLestimate d text_classified_n)))
;; 多項モデル
(defn train [features]
(reduce (fn [model f] (assoc model f (inc (get model f 0)))) {} features))
(defn count-wordset [training-data]
(apply merge-with + (map train training-data)))
(defn mm-MLestimate [documents datasets]
(* (/ (count datasets) (count (concat text_classified_p text_classified_n)))
(apply * (map #(/ % (count (reduce concat datasets)))
(map #(get (count-wordset datasets) %) documents)))))
(defn classify [d]
(sorted-map-by >
(mm-MLestimate d text_classified_p) :positive
(mm-MLestimate d text_classified_n) :negative))
;; 実行例
(classify ["good" "good" "bad" "boring"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment