Skip to content

Instantly share code, notes, and snippets.

@jcromartie
Created June 13, 2011 19:31
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jcromartie/1023496 to your computer and use it in GitHub Desktop.
Save jcromartie/1023496 to your computer and use it in GitHub Desktop.
(ns markov
(use clojure.test))
;; ---------- basic markov chain utils and API ----------
(defn n-grams
"Partition seq s into all sequential groups of n items"
[n s]
(partition n 1 (repeat nil) s))
(defn scan-grams
"Return occurence map from sequeunce of n+1-grams"
[grams]
(reduce (fn [m gram]
(update-in m [(butlast gram)] conj (last gram)))
{}
grams))
(defn- markov-next
"Return the next markov chain based on the occurance structure,
n-gram count, and generated chain vector so far"
[occ n text-vec]
;; (println "text-vec" text-vec)
(let [last-n (seq (subvec text-vec (- (count text-vec) n)))
next-item (rand-nth (get occ last-n))]
(conj text-vec next-item)))
(defn markov-seq
"Infinite lazy markov chain from occurrence map."
([occ init-chain]
(let [init-chain-vec (vec init-chain)
n (-> occ keys first count)
gen-next-chain #(markov-next occ n %)
;; drop 1 to skip init-chain
chain-seq (drop 1 (iterate gen-next-chain init-chain))]
(take-while identity (concat init-chain (map last chain-seq)))))
([occ]
(let [init (-> occ keys rand-nth)]
(markov-seq occ init))))
;; ---------- "brain" API, for smarter text generation ----------
(defn make-brain
"Returns empty brain for text generation training."
[]
{:starts [] :occur {} :originals #{}})
(defn train
"Returns brain trained using n-grams of s. n defaults to 3."
([brain n s]
(let [s-n-grams (n-grams (inc n) s)]
(if (empty? s-n-grams)
brain
(-> brain
(update-in [:originals] conj s)
(update-in [:starts] conj (butlast (first s-n-grams)))
(update-in [:occur] #(merge-with into % (scan-grams s-n-grams)))))))
([brain s]
(train brain 3 s)))
(defn load-brain
"Returns brain trained with trigrams of each line in file at path"
[path]
(reduce train (make-brain) (-> path slurp (.split "\n"))))
(defn- speak-1
"Generate string with optional max length n. n defaults to 50"
([brain]
(let [init (vec (-> brain :starts rand-nth))
char-seq (markov-seq (:occur brain) init)
limit (:limit brain)]
(apply str (if limit (take limit char-seq) char-seq)))))
(defn speak
"Returns lazy seq of sentences generated by brain."
[brain]
(repeatedly #(speak-1 brain)))
(defn speak-unique
"Returns lazy seq of only sentences not present in original training
text."
[brain]
(remove (get brain :originals #{}) (speak brain)))
;; ---------- tests ----------
(deftest simple-text-generation
(let [s "this is a test"
brain (-> (make-brain) (train s))
text-seq (take 1000 (speak brain))]
(are [x] (some #{x} text-seq)
"this a test"
"this is is a test")
(is (some #{s} text-seq)
"Original in generated text")
(is (nil? (some #{s} (take 1000 (speak-unique brain))))
"Original NOT in generated *unique* text")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment