Skip to content

Instantly share code, notes, and snippets.

@ChrisPritchard
Created November 12, 2019 07:34
Show Gist options
  • Save ChrisPritchard/49625ff19ff027f4cb477c69a25b8bec to your computer and use it in GitHub Desktop.
Save ChrisPritchard/49625ff19ff027f4cb477c69a25b8bec to your computer and use it in GitHub Desktop.
A simple markov chain implementation, flexible to support any type (but built with string / sentence samples).
let samples = [
"I am a monster."
"I am a rock star."
"I want to go to Hawaii."
"I want to eat a hamburger."
"I have a really big headache."
"FSharp is a fun language."
"Go eat a big hamburger."
"Markov chains are fun to use."
"What a wonderful day."
"It is a good day to die."
"In this example it generates a random fortune, modeled from the goedel fortunes contained in the famous fortune-mod package."
"markov.sh is extremely fast, even on relatively large data sets (millions of lines)."
"At first, mrkwords.sh will pick a random line from the model and pick the first word of the pair as the first word of our output message."
"After this, it will filter the model to find what word pairs start with the first word it picked."
"Let’s say it picked the word hello as the first word of the message."
"It will then randomly choose the second word of the message from the second element of a pair in the model that starts with the first word it chose."
"In this case, since it picked hello as the first word, it may pick one between everybody and people as the next word."
"It then repeats this process by passing the last word it chose as the word to choose in the next iteration."
"It may be even easier to understand in terms of code than in plain words."
]
let words (line: string) = line.Split(' ') |> Seq.toList
let allWords textLines =
Seq.map (fun line -> ""::(words line)) textLines
let probabilities data =
data
|> Seq.collect List.pairwise
|> Seq.groupBy fst
|> Seq.map (fun (key, values) ->
let freqs = values |> Seq.map snd |> Seq.toArray
key, freqs)
|> Map.ofSeq
let generate (probabilities: Map<'a, 'a []>) (random: System.Random) join isLast start =
let rec generate soFar last =
match Map.tryFind last probabilities with
| None -> failwith "last token not in probability map"
| Some options ->
let next = options.[random.Next(0, Array.length options)]
if isLast next && soFar = start then
next
else if isLast next then
join soFar next
else if soFar = start then
generate next next
else
generate (join soFar next) next
generate start start
let probMap = probabilities (allWords samples)
let generator =
generate probMap (System.Random ())
(fun (a: string) (b: string) -> a + " " + b)
(fun (s: string) -> s.EndsWith("."))
for i = 1 to 10 do
let generated = generator ""
printfn "%s" generated
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment