This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(def freqs-files (memoize (fn [pof] | |
(let [words (to-words pof) | |
word-count (count words)] | |
(apply hash-map (flatten (map (fn [[word count]] | |
[word (/ count word-count)]) | |
(frequencies words)))))))) | |
(def freqs (memoize (fn [pof] | |
(if (instance? java.io.File pof) | |
(freqs-files pof) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(def freq-files (memoize (fn [pof word] | |
(/ (or (get (frequencies-m (to-words pof)) word) 0) | |
(count-m (to-words pof)))))) | |
(def freq (memoize (fn [pof word] | |
(if (instance? java.io.File pof) | |
(freq-files pof word) | |
(mean (vector (freq (first pof) word) ; combine frequencies by taking their unweighted mean. | |
(freq (second pof) word))))))) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns radical (:use [incanter.core :only (abs sq sqrt)] | |
[incanter.stats :only (mean)] | |
[clojure.contrib.combinatorics :only (combinations)])) | |
(def set-m (memoize set)) | |
(def sort-m (memoize sort)) | |
(def flatten-m (memoize flatten)) | |
(def frequencies-m (memoize frequencies)) | |
(def count-m (memoize count)) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns hc (:use [incanter.core :only (abs sq sqrt)] | |
[incanter.stats :only (mean)] | |
[clojure.contrib.combinatorics] | |
[clojure.set])) | |
(def to-words (fn [file-tree] | |
(if (coll? file-tree) | |
(apply concat (map to-words (flatten file-tree))) | |
(re-seq #"[a-z]+" (org.apache.commons.lang.StringUtils/lowerCase (slurp (str file-tree))))))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn merge-general [m1 m2 f g] | |
(let [m1-only (difference (set (keys m1)) (set (keys m2))) | |
m2-only (difference (set (keys m2)) (set (keys m1)))] | |
(merge (merge-with f m1 m2) | |
(into {} (map (fn [k] [k (g (m1 k))]) m1-only)) | |
(into {} (map (fn [k] [k (g (m2 k))]) m2-only))))) | |
user> (merge-general {:a 1, :b 2, :c 3} {:a 4, :b 5, :d 6} * sq) | |
{:d 36, :a 4, :b 10, :c 9} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns hc (:use [incanter.core :only (abs sq sqrt)] | |
[incanter.stats :only (mean)] | |
[clojure.contrib.combinatorics :only (combinations)])) | |
(defn make-rfo [{:keys [score relfreqs interesting rfos-or-file]}] | |
[score relfreqs interesting rfos-or-file]) | |
(def score first) | |
(def relfreqs second) | |
(def interesting #(nth % 2)) | |
(def rfos-or-file #(nth % 3)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns hc (:use [incanter.core :only (abs sq sqrt)] | |
[incanter.stats :only (mean)] | |
[clojure.contrib.combinatorics :only (combinations)])) | |
(def *interesting-words-count* 3) | |
(def *directory-string* "/Users/herdrick/Dropbox/clojure/hierarchical-classifier/data/mixed") | |
(def *txt-files* (seq (org.apache.commons.io.FileUtils/listFiles (new java.io.File *directory-string*) nil true))) | |
(def file->seq (memoize (fn [file] | |
(re-seq #"[a-z]+" |