Skip to content

Instantly share code, notes, and snippets.

@pleasetrythisathome
Created April 14, 2015 21:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pleasetrythisathome/6de80119a961a427f6a5 to your computer and use it in GitHub Desktop.
Save pleasetrythisathome/6de80119a961a427f6a5 to your computer and use it in GitHub Desktop.
markov chains
(defn markov-data
[words]
(->> words
(partition 2 1)
(reduce (fn [acc [w next-w]]
(update-in acc
[w next-w]
(fnil inc 0)))
{})))
(defn markers [line]
(concat [:start]
(clojure.string/split line #"\s+")
[:end]))
(defn make-lines [text]
(-> text
(str/split #"\. ")
(->> (mapcat markers))))
(defn wrand
[slices]
(let [total (reduce + slices)
r (rand total)]
(loop [i 0 sum 0]
(if (< r (+ (slices i) sum))
i
(recur (inc i) (+ (slices i) sum))))))
(defn lazy-lines [file]
(letfn [(lazy-read [rdr]
(lazy-seq
(if-let [line (.readLine rdr)]
(concat (markers line) (lazy-read rdr))
(do (.close rdr) nil))))]
(lazy-read (clojure.java.io/reader file))))
(defn generate-sentence [data]
(loop [ws (data :start)
acc []]
(let [v (vec (vals ws))
i (wrand v)
w (nth (keys ws) i)
nws (data w)]
(if (= :end w)
(clojure.string/join " " acc)
(recur nws (concat acc [w]))))))
(defn generate [input]
(let [d (-> input
(str/split #"\. ")
(->> (mapcat markers))
(markov-data))]
(repeatedly #(generate-sentence d))))
(defn strip-html-tags
"Function strips HTML tags from string."
[s]
(.text (Jsoup/parse s)))
(def hacker-news (->> "http://www.gutenberg.org/cache/epub/100/pg100.txt"
http/get
deref
:body
strip-html-tags
markov-data
generate
(take 5)
(str/join ". ")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment