Created
December 11, 2013 13:21
-
-
Save Dan-Q/7910309 to your computer and use it in GitHub Desktop.
What's the hardest-to-guess word in Hangman, for any given number of letters? Give this program a wordlist.txt and a number of letters, and it'll tell you, by repeatedly playing an optimised game against itself and then sorting the results.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# -*- coding: iso-8859-1 -*- | |
# Inspired by http://datagenetics.com/blog/april12012/index.html | |
# Comes up with the "hardest words" to guess, in hangman, assuming the player is playing optimally | |
# (choosing the letter most-likely to appear, given a dictionary of all candidate words and the | |
# letters guessed so far [because hangman guesses are not independent; they affect the gamestate | |
# either by saying "this letter DOES NOT feature" or by saying "this letter features in THESE | |
# positions"]) | |
# Result: a "hardest hangman words" dictionary? | |
# DEBUG: switch DEBUG_MODE to true to show additional output (basically, a discussion between | |
# the simulated players!) | |
DEBUG_MODE = false | |
# DEBUG: for testing, or for simulating best-play on a particular word, use a cut-down wordlist | |
# (be sure to change NUM_LETTERS if you pick a different word length, too!) | |
#ALL_WORDS = %w{cat dog hen} | |
# How many letters are in the words we're playing with? | |
NUM_LETTERS = 6 | |
puts "Working with #{NUM_LETTERS}-letter words. Filtering dictionary..." if DEBUG_MODE | |
# Preload a list of all possible words (using ||= in case we're pre-setting the words for | |
# debugging purposes, earlier: note that this may produce a warning!) | |
# We lowercase everything (fixes e.g. 'kHz'), remove words with special characters (removes e.g. | |
# 'déjà'), and get rid of any duplicates (no point in playing more games than we have to!) | |
ALL_WORDS ||= File::read('wordlist.txt').split.reject{|word| word.length != NUM_LETTERS || word !~ /^[a-z]+$/}.collect{|word| word.downcase}.uniq | |
puts "There are #{ALL_WORDS.length} words to play." | |
# Given a board (a string of dots with letters in place, e.g. "ca." could be cat or cab or car | |
# or whatever) and an array of WRONG guesses, returns the best-bet move. In event of tie, returns | |
# the one that comes up alphabetically first | |
def best_move_for(board, wrong_guesses, dictionary) | |
# Set up a hash of letter frequencies ('a' => 0, 'b' => 0), excluding the known wrong guesses | |
# and the letters already found | |
letters_already_found = board.chars.to_a.uniq.reject{|l| l == '.'} | |
letter_frequencies = Hash[('a'..'z').reject{|l| letters_already_found.include?(l) || wrong_guesses.include?(l)}.map{|l| [l,0]}] | |
# For each possible word, give a 'point' to each potential letter | |
dictionary.each do |possible_word| | |
possible_word.chars.each do |letter| | |
letter_frequencies[letter] += 1 if letter_frequencies[letter] | |
end | |
end | |
# The letter with the most points is the winner | |
letter_frequencies.max_by{|k,v|v}[0] | |
end | |
# Given a target word, a state-of-play board, a guess, and an array of wrong guesses, | |
# returns an array containing the new board state and the array of wrong guesses (amended if | |
# the guess just made was wrong) | |
def make_move(target_word, board, guess, wrong_guesses) | |
correct_guess = false | |
# Go through each letter of the target word to hunt for the guessed letter | |
target_word.length.times do |i| | |
if target_word[i] == guess | |
# If we find it, update the board accordingly | |
correct_guess = true | |
board[i] = guess | |
end | |
end | |
# Add the guess to the list of wrong guesses if we didn't find a new letter | |
wrong_guesses << guess unless correct_guess | |
if DEBUG_MODE | |
if correct_guess | |
puts " + Correct. New board state: #{board}" | |
else | |
puts " + Wrong. Bad guesses so far: #{wrong_guesses.join(', ')}" | |
end | |
end | |
# Return the result | |
[board, wrong_guesses] | |
end | |
# Recursively plays an optimal game of hangman, returing a hash of statistics about the game | |
# suitable for storing in the 'result' array | |
def result_for(target_word, board, num_guesses = 0, wrong_guesses = [], dictionary = ALL_WORDS.clone) | |
# If we've won, break out with a result | |
return { :word => target_word, :num_guesses => num_guesses, :wrong_guesses => wrong_guesses.length } if target_word == board | |
# Otherwise, filter the wordlist down and determine the best move | |
puts " > Considering my best move..." if DEBUG_MODE | |
unless num_guesses == 0 # (shortcut: no point filtering the wordlist if we've made no guesses) | |
dictionary.select!{|word| word =~ Regexp::new("^#{board}$")} | |
end | |
best_move = best_move_for(board, wrong_guesses, dictionary) | |
# Make the move, then continue to play | |
puts " > I guess '#{best_move}'" if DEBUG_MODE | |
board, wrong_guesses = make_move(target_word, board, best_move, wrong_guesses) | |
result_for(target_word, board, num_guesses + 1, wrong_guesses, dictionary) | |
end | |
# Play a game for every single word in the wordlist | |
result = ALL_WORDS.collect do |target_word| | |
puts "Target word is #{target_word}" if DEBUG_MODE | |
# Set up the game with an empty board, expressed as a string e.g. "......", and guess space | |
board = '.'*NUM_LETTERS | |
# Play the game (recursively making and enumerating 'best moves') | |
result_for(target_word, board) | |
end | |
# Sort the results (reverse order by number of wrong guesses, then reverse order by number | |
# of total guesses, then alphabetically by word) | |
puts "Sorting the results..." if DEBUG_MODE | |
result.sort_by!{|r| sprintf('%02d%02d%s', 26-r[:wrong_guesses], 26-r[:num_guesses], r[:word])} | |
# Output the results | |
word_column_width = (NUM_LETTERS < 4 ? 4 : NUM_LETTERS) | |
printf("%#{word_column_width}s %9s %7s\n", 'Word', 'Guesses', 'Wrong') | |
result.each do |r| | |
printf("%#{word_column_width}s %9d %7d\n", r[:word], r[:num_guesses], r[:wrong_guesses]) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment