Skip to content

Instantly share code, notes, and snippets.

@michaelchadwick
Last active June 12, 2024 17:26
Show Gist options
  • Save michaelchadwick/307b892fafdca39ed27318fd4b394f37 to your computer and use it in GitHub Desktop.
Save michaelchadwick/307b892fafdca39ed27318fd4b394f37 to your computer and use it in GitHub Desktop.
Take array of words and remove any that are anagrams of any other words in array
<?php
// file has 9-letter words
// and looks like this
/*
{
"9": [
"aardvarks",
...
"xenomorph"
]
}
*/
$filename = '';
if (isset($argv[1])) {
$filename = $argv[1];
} else {
echo "Error: No filename provided.\n";
exit(1);
}
$WORDS_INPUT_JSON_FILE = $filename;
$WORDS_OUTPUT_JSON_FILE = 'de-anagrammed/' . $filename . '.de-anagrammed.json';
if ($response = file_get_contents($WORDS_INPUT_JSON_FILE)) {
$words = json_decode($response, true)[9];
echo "checking " . count($words) . " words\n";
} else {
echo "Error: Could not load filename.\n";
exit(1);
}
$isUnique = false;
$min = 0;
$max = count($words);
$potentialWord = '';
$uniqueWords = [];
for ($i = 0; $i < $max; $i++) {
echo "[word #$i]\r";
$anagramsFound = 0;
$potentialWord = $words[$i];
foreach ($words as $word) {
if ($word != $potentialWord) {
$arr1 = str_split(strtolower($word));
sort($arr1);
$word1 = implode($arr1);
$arr2 = str_split(strtolower($potentialWord));
sort($arr2);
$word2 = implode($arr2);
if ($word1 == $word2) {
$anagramsFound++;
}
}
}
if ($anagramsFound == 0) {
$isUnique = true;
array_push($uniqueWords, $potentialWord);
}
}
if ($isUnique) {
echo "-----------------------------------------------------------\n";
echo "found _" . count($uniqueWords) . "_ word(s) with no anagrams in corpus\n";
echo "-----------------------------------------------------------\n";
if ($fp = fopen($WORDS_OUTPUT_JSON_FILE, 'w')) {
if (fwrite($fp, json_encode(array('9' => $uniqueWords)))) {
echo "-> wrote output json file successfully: " . $WORDS_OUTPUT_JSON_FILE . "\n";
} else {
echo "Error: Could not write to output file.\n";
exit(1);
}
fclose($fp);
}
} else {
echo "-----------------------------------------------------------\n";
echo "all words have a 9-letter anagram somewhere in the corpus!\n";
echo "-----------------------------------------------------------\n";
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment