Skip to content

Instantly share code, notes, and snippets.

@samanthadoran
Last active October 11, 2015 17:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samanthadoran/013c70cd429d3769d23b to your computer and use it in GitHub Desktop.
Save samanthadoran/013c70cd429d3769d23b to your computer and use it in GitHub Desktop.
Arbitrary order markov text generator
import tables
import sequtils, strutils, math
proc readCorpusString(filename: string): seq[string] =
#Turns a corpus into a sequence of words
result = newSeq[string]()
var buff: string = ""
var file: File
#Read a file into the program
if open(file, filename):
for line in filename.lines:
buff &= " " & line
file.close()
return buff.split(' ')
proc makeTable(corpus: seq[string], order: int = 2): Table[string, seq[string]] =
#Generates the markov table
result = initTable[string, seq[string]]()
#Create the n word keys and their entries
for j in countup(order, len(corpus) - 1 - order):
var tempkey = newSeq[string]()
#Get the words making up the key...
for k in countup(j - order, j - 1):
tempkey.add(corpus[k].toLower())
#Keys are just lowercase joined sequences
let key = join(tempkey, " ")
#If the key exists, just append
if result.hasKey(key):
result[key] = concat(@[corpus[j]], result[key])
#Otherwise, instantiate
else:
result[key] = @[corpus[j]]
proc generate(corpusSeq: seq[string], corpusTable: Table[string, seq[string]],
maxWords: int, seedIndex: int = -1, order: int = 2): string =
#Generate a markov text
#Make sure we have a proper random initialized
randomize()
var seed: int
#Only set the seed to seedIndex if our seed word is in the corpus.
if seedIndex != -1:
seed = seedIndex
else:
seed = random(len(corpusSeq) - 1 - order)
#Get order n seed words
var seedWords: seq[string] = @[]
for i in 0..<order:
seedWords.add(corpusSeq[seed + i])
var generatedWords = newSeq[string]()
for i in 0..maxWords:
generatedWords.add(seedWords[0])
#Keys are just joined lowercase strings
let key = seedWords.join(" ").toLower()
#We can't continue if we don't have it in the table
if not corpusTable.hasKey(key):
break
let randomWordChoice = random(corpusTable[key])
#Remove the start and add to the tail
seedWords.delete(0, 0)
seedWords.add(randomWordChoice)
result = join(generatedWords, " ")
proc main() =
echo("Enter the name of the corpus you would like to parse...")
let corpusSeq = readCorpusString(readLine(stdin).strip())
echo("Enter the order of the markov chain you would like(2 is most common)...")
let order = readLine(stdin).parseInt()
let corpusTable = makeTable(corpusSeq, order)
while true:
echo("\n\n\n")
var seedIndex = -1
echo("How many words would you like to generate?")
let maxWords = readLine(stdin).strip().parseInt()
echo("Enter the word you would like to seed(Leave blank for random)")
let answer = readLine(stdin).strip().toLower()
if answer != "":
seedIndex = corpusSeq.find(answer)
echo("\n")
echo(generate(corpusSeq, corpusTable, maxWords, seedIndex, order))
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment