Skip to content

Instantly share code, notes, and snippets.

@aruneko
Created July 3, 2017 06:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aruneko/783b7cd33eb1bbdb6ae3f39616585a03 to your computer and use it in GitHub Desktop.
Save aruneko/783b7cd33eb1bbdb6ae3f39616585a03 to your computer and use it in GitHub Desktop.
n-gramを作ってランダムに文章を生成するマルコフもどき
package net.aruneko.markov
import scala.annotation.tailrec
import scala.io.Source
import scala.util.Random
object Main {
def main(args: Array[String]): Unit = {
val trigrams = Source.fromResource("kokoro.txt")
.getLines().filter(_ != "")
.map(makeNGram(3, _))
.flatten.toArray.sorted.groupBy(identity).values
.map(grams => (grams.length, grams.head)).toArray
val rand = new Random()
val sentence = buildSentence(rand, trigrams)
println(sentence)
}
def makeNGram(n: Int, str: String): List[String] = {
@tailrec
def inner(n: Int, str: String, ngram: List[String]): List[String] = {
if (str.length <= n) {
str :: ngram
} else {
inner(n, str.tail, str.take(n) :: ngram)
}
}
inner(n, str, List()).reverse
}
def buildSentence(rand: Random, nGram: Array[(Int, String)]): String = {
@tailrec
def inner(rand: Random, nGram: Array[(Int, String)], sentence: String): String = {
val index = rand.nextInt(nGram.length)
val (_, next_string) = nGram(index)
if (next_string.contains("。")) {
sentence + next_string
} else {
inner(rand, nGram, sentence + next_string)
}
}
inner(rand, nGram, "")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment