Last active
October 17, 2017 03:28
-
-
Save Alanaktion/d9fb04074defe7fcbb04e5b5dd1490f6 to your computer and use it in GitHub Desktop.
Generate pseudo-random phrases from Markov chains
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env php | |
<?php | |
if (empty($argv[1]) || $argv[1] == '-h') { | |
exit ("Usage: markov <input file> [words] [prefix] [minWords]\n"); | |
} | |
$text = file_get_contents($argv[1]); | |
$map = map($text); | |
$words = $argv[2] ?? 20; | |
$minWords = $argv[4] ?? 10; | |
$prefix = $argv[3] ?? arand(prefixes($text)); | |
echo markov($map, $prefix, $words, $minWords), "\n"; | |
/** | |
* Generate sentence from Markov map | |
* @param array $map Map of words | |
* @param string $prefix Word to start with | |
* @param int $maxWords Minimum number of words in result | |
* @param int $minWords Maximum number of words in result | |
* @return string | |
*/ | |
function markov( | |
array $map, string $prefix, int $maxWords = 20, int $minWords = 10 | |
): string { | |
$word = $prefix; | |
$result = [$prefix]; | |
if (!array_key_exists($word, $map)) { | |
throw new Exception("Prefix word does not exist in map!"); | |
} | |
while (array_key_exists($word, $map) && count($result) <= $maxWords) { | |
$word = arand($map[$word]); | |
$result[] = $word; | |
if (in_array($word{strlen($word) - 1}, ['.', '?', '!']) | |
&& count($result) > $minWords | |
) { | |
break; | |
} | |
} | |
return implode(' ', $result); | |
} | |
/** | |
* Generate Markov pair map | |
* @param string $text Text containing one or more phrases | |
* @param string $split Regular expression to split phrases on | |
* @return array | |
*/ | |
function map(string $text, string $split = "/\n/"): array { | |
$map = []; | |
if ($split === null) { | |
$phrases = [$text]; | |
} else { | |
$phrases = preg_split($split, $text); | |
} | |
foreach ($phrases as $phrase) { | |
$words = preg_split("/[\s]+/", $phrase); | |
$count = count($words); | |
foreach ($words as $i=>$word) { | |
if (!$word || $i >= $count - 1) { | |
continue; | |
} | |
if (!array_key_exists($word, $map)) { | |
$map[$word] = []; | |
} | |
$map[$word][] = $words[$i + 1]; | |
} | |
} | |
return $map; | |
} | |
/** | |
* Generate array of prefix words | |
* @param string $text Text containing one or more phrases | |
* @param string $split Regular expression to split phrases on | |
* @return array | |
*/ | |
function prefixes(string $text, string $split = "/\n/"): array { | |
$phrases = preg_split($split, $text); | |
$prefixes = []; | |
foreach ($phrases as $phrase) { | |
$word = explode(" ", $phrase, 2)[0]; | |
if (trim($word)) { | |
$prefixes[] = trim($word); | |
} | |
} | |
return $prefixes; | |
} | |
/** | |
* Get random value from array using mt_rand | |
* @param array $array | |
* @return string | |
*/ | |
function arand(array $array): string { | |
$key = mt_rand(0, count($array) - 1); | |
return $array[$key]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment