Created
December 20, 2010 19:45
-
-
Save Ming-Tang/748885 to your computer and use it in GitHub Desktop.
Generates quasi-nonsense words by learning from a sample file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* | |
Compilation: | |
$ fsc markov.fs | |
Usage: | |
$ [mono] markov.exe | |
Load a file and output in pauses | |
$ (echo "init <FILE>.txt"; for i in `seq 1 100000`; do echo $i; sleep 0.2; done; echo exit) | [mono] markov.exe | |
*) | |
open System | |
open System.IO | |
open System.Text.RegularExpressions | |
let row (a : 'a[,]) n = | |
a.[n..n, 0..] | |
|> (fun arr -> Array.init (Array2D.length2 arr) (fun i -> arr.[0, i])) | |
let chains n (s : 'a[]) = | |
let a = Array.init (s.Length + 1 - n) (fun x -> Array.zeroCreate<'a> n) | |
for i = 0 to s.Length - n do | |
for j = 0 to n - 1 do | |
a.[i].[j] <- s.[i + j] | |
a | |
let mutable order = 1 | |
let mutable data : string[] = [| |] | |
let mutable length = 0 | |
let mutable states = 0 | |
let mutable words = 0 | |
let mutable lookupTable : int[][] = [| |] | |
let mutable wordTable : string[] = [| |] | |
let mutable datai : int[] = [| |] | |
let mutable chain : int[][] = Array.zeroCreate<int[]> 1 | |
let mutable matrix : int[,] = Array2D.zeroCreate<int> 1 1 | |
let mutable sums = Array.zeroCreate<int> 1 | |
let mutable initials : int list = [ ] | |
let index (ms : int[]) = | |
Array.findIndex ((=) ms) lookupTable | |
let rand = new Random() | |
let randState id = | |
let sum = sums.[id] | |
let mutable r = rand.Next(sum) | |
let mutable i = 0 | |
let mutable o = 0 | |
let mutable cont = true | |
while cont do | |
o <- o + matrix.[id, i] | |
if matrix.[id, i] <> 0 && o >= r then | |
cont <- false | |
if i = words - 1 then cont <- false | |
else i <- i + 1 | |
i - 1 | |
let initialize (dt : string[]) (ord : int) = | |
if ord > 1 then failwith "Not supported." | |
printfn " [Loading file] " | |
order <- ord | |
data <- dt | |
length <- data.Length | |
let set = Set.ofArray data | |
words <- set.Count | |
wordTable <- Set.toArray set | |
datai <- Array.map (fun x -> Array.findIndex ((=) x) wordTable) data | |
chain <- chains order datai | |
lookupTable <- | |
chain | |
|> Set.ofArray | |
|> Set.toArray | |
states <- lookupTable.Length | |
matrix <- Array2D.zeroCreate<int> states words | |
sums <- Array.zeroCreate<int> states | |
printfn " [Generating Markov Chain] " | |
for i = 0 to length - order - 1 do | |
let ao = chain.[i] | |
let id = index ao | |
let next = datai.[i + order] | |
matrix.[id, next] <- matrix.[id, next] + 1 | |
sums.[id] <- sums.[id] + 1 | |
initials <- Array.toList lookupTable.[rand.Next(states)] | |
initialize ( | |
"hello world markov chain generator please open a file hello world please open a file hello world or input init filename order to get started".Split(' ') | |
) order | |
let mutable cont = true | |
while cont do | |
try | |
let inp = (Console.ReadLine() + " - ").Split([| ' ' |], StringSplitOptions.RemoveEmptyEntries) | |
match inp.[0] with | |
| "init" -> | |
initialize ( | |
let fileName = inp.[1] | |
let r = new Regex(@"[^a-zA-Z]+"); | |
seq { | |
use sr = new StreamReader(fileName) | |
while not sr.EndOfStream do | |
let line = sr.ReadLine() | |
yield! r.Split(line) | |
} | |
|> Seq.filter ((<>) "") | |
|> Seq.map (fun x -> x.ToLower()) | |
|> Array.ofSeq | |
) ( | |
if inp.Length >= 3 then | |
try | |
Int32.Parse(inp.[2]) | |
with | |
| :? System.FormatException -> 1 | |
else 1 | |
) | |
| "select" -> | |
initials <- Array.toList lookupTable.[rand.Next(states)] | |
| "exit" -> | |
cont <- false | |
| _ -> | |
let next = | |
initials | |
|> Array.ofList | |
|> index | |
|> (fun id -> | |
//matrix.[id, randState id] | |
let s = randState id | |
printf " %s" wordTable.[s] | |
s | |
) | |
initials <- List.tail (initials @ [next]) | |
with | |
| :? FileNotFoundException -> | |
initialize ("input error maybe file does not exist please check filename input error file does not exist".Split(' ')) 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment