Skip to content

Instantly share code, notes, and snippets.

Created August 20, 2014 17:22
Show Gist options
  • Save triclops200/99a085d6067ebe1646af to your computer and use it in GitHub Desktop.
Save triclops200/99a085d6067ebe1646af to your computer and use it in GitHub Desktop.
(defn increment-counter [res word]
(update-in res [word] (fnil inc 0)))
(defn increment-link [dict word1 word2]
(update-in dict [word1] increment-counter word2))
(defn split-text [text]
(filter #(not (re-matches #"\s+" %)) (clojure.string/split text #"[.!?>\-\"\*\)\(]")))
(defn fix-sentences [sentences]
(map #(str "> " (clojure.string/trim %) " .") sentences))
(defn split-sentences [sentences]
(filter #(not (empty? %))
(map #(clojure.string/split (clojure.string/lower-case %) #"\s+| +") sentences)))
(defn zip [& colls]
(apply map vector colls))
(defn slurp-sentence [dict sentence]
(reduce #(apply increment-link %1 %2) dict (zip sentence (rest sentence))))
(defn slurp-text [text]
(reduce slurp-sentence {}
(-> text
(defn get-sum-of-entries [entries]
(reduce #(+ %1 (second %2)) 0 entries))
(defn chooser [[bestword sum] [nextword chance]]
(if (<= sum 0)
[bestword sum]
(if (<= (- sum chance) 0)
[nextword (- sum chance)]
[bestword (- sum chance)])))
(defn get-random-entry-by-count [entries]
(let [sum (get-sum-of-entries entries)
rnd (inc (rand-int sum))]
(first (reduce chooser [(first (first entries)) rnd] entries))))
(defn build-sentence [dict]
(let [first-word (get-random-entry-by-count (dict ">"))]
(if (or (empty? first-word) (re-matches #".*\s+.*" first-word))
(recur dict)
(let [sentence
(loop [prev-word first-word
sentence [first-word]]
(let [next-word (get-random-entry-by-count (dict prev-word))]
(if (or (> (count sentence) 25) (= next-word "."))
(recur next-word (conj sentence next-word)))))]
(->> sentence
(clojure.string/join " ")
(#(str % "."))
(defn build-paragraph [dict n]
(clojure.string/join " " (repeatedly n #(build-sentence dict))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment