Skip to content

Instantly share code, notes, and snippets.

@martoo6
Created October 28, 2016 21:28
Show Gist options
  • Save martoo6/e5e19552e6e7149becf7896b98651c4d to your computer and use it in GitHub Desktop.
Save martoo6/e5e19552e6e7149becf7896b98651c4d to your computer and use it in GitHub Desktop.
Markov Chain N-gram
val file = scala.io.Source.fromFile("quijote.txt")
val txt = file.mkString
val splited = txt.replace("\n", " ").replace("[ *]", " ").split(" ")
val wgram = scala.collection.mutable.HashMap[String, List[String]]()
(0 until splited.size-1).foreach{ i =>
val word = splited(i)
wgram(word) = splited(i + 1) :: wgram.getOrElse(word, Nil)
}
val r = scala.util.Random
def getFirstWord: String = {
val res = wgram.keys.toList(r.nextInt(wgram.size-2))
if(res.charAt(0).isLetter && res.charAt(0).isUpper && res.last.isLetter)
res
else
getFirstWord
}
def getText(start: String = getFirstWord, words: Int, endPunctuation: Boolean = false): String = {
val lst = wgram(start)
words match {
case n if n<=0 && !endPunctuation =>
start
case n if n<=0 && endPunctuation && lst.exists(s=> s.endsWith(".") || lst.endsWith("!") || lst.endsWith("?")) =>
lst.collectFirst {
case s if s.endsWith(".") || lst.endsWith("!") || lst.endsWith("?") => s
}.get
case n =>
val word = lst(r.nextInt(lst.size))
s"$start ${getText(word, words-1, endPunctuation).toLowerCase}"
}
}
(1 to 10).foreach { _ =>
println(getText(words = 10, endPunctuation = true))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment