Created
January 15, 2017 12:02
-
-
Save pixelgruff/ada609c8cfce4d3fea353bea39906315 to your computer and use it in GitHub Desktop.
Word generating to the moon and back
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package word; | |
import com.google.common.collect.ImmutableMultiset; | |
import com.google.common.collect.Lists; | |
import com.google.common.collect.Multiset; | |
import org.apache.commons.cli.*; | |
import org.apache.commons.lang3.StringUtils; | |
import org.apache.commons.lang3.Validate; | |
import java.io.IOException; | |
import java.net.URISyntaxException; | |
import java.net.URL; | |
import java.nio.file.Files; | |
import java.nio.file.Paths; | |
import java.util.List; | |
import java.util.Set; | |
import java.util.stream.Collectors; | |
import java.util.stream.Stream; | |
/** | |
* Generate some words | |
*/ | |
public class WordGenerator | |
{ | |
// Command-line arguments | |
private static final String ALPHABET_OPTION_NAME = "alphabet"; | |
private static final String CHARACTER_LIMIT_OPTION_NAME = "limit"; | |
// Resource locations | |
private static final String DICTIONARY = "words.txt"; | |
public static void main(final String[] args) | |
{ | |
// Parse command-line arguments | |
final Options options = new Options() | |
.addOption(characterListOption()) | |
.addOption(characterLimitOption()); | |
final CommandLineParser commandLineParser = new DefaultParser(); | |
final HelpFormatter helpFormatter = new HelpFormatter(); | |
final CommandLine commandLine; | |
try { | |
commandLine = commandLineParser.parse(options, args); | |
// Do some weird crazy casting nonsense, eugh | |
final String alphabetString = (String) commandLine.getParsedOptionValue(ALPHABET_OPTION_NAME); | |
final List<Character> alphabet = Lists.charactersOf(alphabetString); | |
final long characterLimit = (Long) commandLine.getParsedOptionValue(CHARACTER_LIMIT_OPTION_NAME); | |
// Verify the input isn't silly | |
Validate.notEmpty(alphabet, "Cannot match any words with a blank alphabet!"); | |
Validate.isTrue(characterLimit > 0, "Cannot match any words with a non-positive word length!"); | |
// Load the dictionary of English words | |
final Set<String> dictionary = loadDictionary(); | |
System.out.printf("Finding length <= %d words from a %d-character alphabet and a %d-length dictionary.\n", | |
characterLimit, alphabet.size(), dictionary.size()); | |
// Find all matching words | |
final Set<String> matchingWords = findAllMatchingWords(dictionary, alphabet, characterLimit); | |
System.out.printf("Found %d matching words: %s\n", matchingWords.size(), matchingWords); | |
} catch (Exception e) { | |
System.err.println("Failed to parse command line!"); | |
helpFormatter.printHelp("word-generator", options); | |
e.printStackTrace(); | |
throw new RuntimeException(e); | |
} | |
} | |
private static Set<String> findAllMatchingWords(final Set<String> dictionary, final List<Character> alphabet, | |
final long characterLimit) | |
{ | |
// Store all characters in the provided alphabet, along with the number of times they appear in the alphabet | |
final Multiset<Character> characterBank = ImmutableMultiset.copyOf(alphabet); | |
// Filter and return all matching words | |
return dictionary.stream() | |
// Discard any words longer than the size limit | |
.filter(word -> word.length() <= characterLimit) | |
// Discard any words that use any character not in the alphabet | |
.filter(word -> Lists.charactersOf(word).stream() | |
.allMatch(characterBank::contains)) | |
// Discard any words that use any character more often than provided in the alphabet | |
.filter(word -> { | |
// Identify how many times each character appears in this word | |
final Multiset<Character> charactersInWord = ImmutableMultiset.copyOf(Lists.charactersOf(word)); | |
// Confirm no character appears more times in this word than in the alphabet | |
for (final Multiset.Entry<Character> entry : charactersInWord.entrySet()) { | |
final Character character = entry.getElement(); | |
final int wordCount = entry.getCount(); | |
final int alphabetCount = characterBank.count(character); | |
if (alphabetCount < wordCount) { | |
return false; | |
} | |
} | |
return true; | |
}).collect(Collectors.toSet()); | |
} | |
private static Set<String> loadDictionary() | |
{ | |
final URL dictionaryUrl = ClassLoader.getSystemResource(DICTIONARY); | |
try (Stream<String> stream = Files.lines(Paths.get(dictionaryUrl.toURI()))) { | |
return stream | |
// Clean up any newlines that try to sneak in as words | |
.filter(StringUtils::isNotBlank) | |
.collect(Collectors.toSet()); | |
} catch (IOException | URISyntaxException e) { | |
System.err.println("Failed to load dictionary file!"); | |
e.printStackTrace(); | |
throw new RuntimeException(e); | |
} | |
} | |
private static Option characterListOption() | |
{ | |
return Option.builder("a") | |
.hasArg() | |
.longOpt(ALPHABET_OPTION_NAME) | |
.desc("Non-unique alphabet of characters available for word matching.") | |
.required() | |
.type(String.class) | |
.build(); | |
} | |
private static Option characterLimitOption() | |
{ | |
return Option.builder("l") | |
.hasArg() | |
.longOpt(CHARACTER_LIMIT_OPTION_NAME) | |
.desc("Maximum length of word available for matching.") | |
.required() | |
.type(Number.class) | |
.build(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment