Created
February 3, 2017 15:23
-
-
Save triss/d965d6d6b572cd8c06cec6325e75193a to your computer and use it in GitHub Desktop.
Markov model's in Clojure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns markov.core | |
(:require [clojure.java.io :as io] | |
[clojure.string :as str] | |
[clojure.pprint :refer [pprint]])) | |
(defn count-n-gram | |
"Updates count of number of time n-gram has been seen in map." | |
[m n-gram] (update-in m n-gram #(inc (or % 0)))) | |
(defn next-word | |
"Given a map of n-grams and a particular n-gram select the next word." | |
[n-grams n-gram] | |
(let [next-word-probs (get-in n-grams n-gram) | |
n (reduce max (vals next-word-probs)) | |
r (rand-int (inc n))] | |
(->> next-word-probs | |
(filter #(>= (val %) r)) | |
(rand-nth) | |
(key)))) | |
(defn next-n-gram | |
"Given a set of n-grams and a particular n-gram,construct another n-gram." | |
[n-grams n-gram] | |
(conj (vec (rest n-gram)) | |
(next-word n-grams n-gram))) | |
(defn n-grams->sentence | |
"Takes a list of n-grams and returns a sentence string." | |
[coll] | |
(->> (rest coll) | |
(map last) | |
(concat (first coll)) | |
(str/join " "))) | |
;;;; Load in and clean up our words | |
(def words | |
(->> (io/resource "testFile.txt") | |
(slurp) | |
(str/lower-case) | |
(re-seq #"\p{Lower}+|\p{Punct}"))) | |
;;;; Create markov model with bigrams | |
(def bigrams | |
(->> (partition 2 1 words) | |
(reduce count-n-gram {}))) | |
;;;; Generate a sentence using bigrams | |
(->> ["the"] | |
(iterate #(next-n-gram bigrams %)) | |
(take 20) | |
(n-grams->sentence)) | |
;;;; Create markov model with trigrams | |
(def trigrams | |
(->> (partition 3 1 words) | |
(reduce count-n-gram {}))) | |
;;;; Generate a sentence using those | |
(->> ["the" "cat"] | |
(iterate #(next-n-gram trigrams %)) | |
(take 20) | |
(n-grams->sentence)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment