Skip to content

Instantly share code, notes, and snippets.

@Alanaktion
Last active October 17, 2017 03:28
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Alanaktion/d9fb04074defe7fcbb04e5b5dd1490f6 to your computer and use it in GitHub Desktop.
Save Alanaktion/d9fb04074defe7fcbb04e5b5dd1490f6 to your computer and use it in GitHub Desktop.
Generate pseudo-random phrases from Markov chains
#!/usr/bin/env php
<?php
if (empty($argv[1]) || $argv[1] == '-h') {
exit ("Usage: markov <input file> [words] [prefix] [minWords]\n");
}
$text = file_get_contents($argv[1]);
$map = map($text);
$words = $argv[2] ?? 20;
$minWords = $argv[4] ?? 10;
$prefix = $argv[3] ?? arand(prefixes($text));
echo markov($map, $prefix, $words, $minWords), "\n";
/**
* Generate sentence from Markov map
* @param array $map Map of words
* @param string $prefix Word to start with
* @param int $maxWords Minimum number of words in result
* @param int $minWords Maximum number of words in result
* @return string
*/
function markov(
array $map, string $prefix, int $maxWords = 20, int $minWords = 10
): string {
$word = $prefix;
$result = [$prefix];
if (!array_key_exists($word, $map)) {
throw new Exception("Prefix word does not exist in map!");
}
while (array_key_exists($word, $map) && count($result) <= $maxWords) {
$word = arand($map[$word]);
$result[] = $word;
if (in_array($word{strlen($word) - 1}, ['.', '?', '!'])
&& count($result) > $minWords
) {
break;
}
}
return implode(' ', $result);
}
/**
* Generate Markov pair map
* @param string $text Text containing one or more phrases
* @param string $split Regular expression to split phrases on
* @return array
*/
function map(string $text, string $split = "/\n/"): array {
$map = [];
if ($split === null) {
$phrases = [$text];
} else {
$phrases = preg_split($split, $text);
}
foreach ($phrases as $phrase) {
$words = preg_split("/[\s]+/", $phrase);
$count = count($words);
foreach ($words as $i=>$word) {
if (!$word || $i >= $count - 1) {
continue;
}
if (!array_key_exists($word, $map)) {
$map[$word] = [];
}
$map[$word][] = $words[$i + 1];
}
}
return $map;
}
/**
* Generate array of prefix words
* @param string $text Text containing one or more phrases
* @param string $split Regular expression to split phrases on
* @return array
*/
function prefixes(string $text, string $split = "/\n/"): array {
$phrases = preg_split($split, $text);
$prefixes = [];
foreach ($phrases as $phrase) {
$word = explode(" ", $phrase, 2)[0];
if (trim($word)) {
$prefixes[] = trim($word);
}
}
return $prefixes;
}
/**
* Get random value from array using mt_rand
* @param array $array
* @return string
*/
function arand(array $array): string {
$key = mt_rand(0, count($array) - 1);
return $array[$key];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment