Dan-Q/zen-hangman.rb

## zen-hangman.rb
#!/usr/bin/env ruby
# -*- coding: iso-8859-1 -*-
# Inspired by http://datagenetics.com/blog/april12012/index.html
# Comes up with the "hardest words" to guess, in hangman, assuming the player is playing optimally
# (choosing the letter most-likely to appear, given a dictionary of all candidate words and the
# letters guessed so far [because hangman guesses are not independent; they affect the gamestate
# either by saying "this letter DOES NOT feature" or by saying "this letter features in THESE
# positions"])
# Result: a "hardest hangman words" dictionary?

# DEBUG: switch DEBUG_MODE to true to show additional output (basically, a discussion between
# the simulated players!)
DEBUG_MODE = false

# DEBUG: for testing, or for simulating best-play on a particular word, use a cut-down wordlist
#        (be sure to change NUM_LETTERS if you pick a different word length, too!)
#ALL_WORDS = %w{cat dog hen}

# How many letters are in the words we're playing with?
NUM_LETTERS = 6
puts "Working with #{NUM_LETTERS}-letter words. Filtering dictionary..." if DEBUG_MODE

# Preload a list of all possible words (using ||= in case we're pre-setting the words for
# debugging purposes, earlier: note that this may produce a warning!)
# We lowercase everything (fixes e.g. 'kHz'), remove words with special characters (removes e.g.
# 'déjà'), and get rid of any duplicates (no point in playing more games than we have to!)
ALL_WORDS ||= File::read('wordlist.txt').split.reject{|word| word.length != NUM_LETTERS || word !~ /^[a-z]+$/}.collect{|word| word.downcase}.uniq
puts "There are #{ALL_WORDS.length} words to play."

# Given a board (a string of dots with letters in place, e.g. "ca." could be cat or cab or car
# or whatever) and an array of WRONG guesses, returns the best-bet move. In event of tie, returns
# the one that comes up alphabetically first
def best_move_for(board, wrong_guesses, dictionary)
  # Set up a hash of letter frequencies ('a' => 0, 'b' => 0), excluding the known wrong guesses
  # and the letters already found
  letters_already_found = board.chars.to_a.uniq.reject{|l| l == '.'}
  letter_frequencies = Hash[('a'..'z').reject{|l| letters_already_found.include?(l) || wrong_guesses.include?(l)}.map{|l| [l,0]}]
  # For each possible word, give a 'point' to each potential letter
  dictionary.each do |possible_word|
    possible_word.chars.each do |letter|
      letter_frequencies[letter] += 1 if letter_frequencies[letter]
    end
  end
  # The letter with the most points is the winner
  letter_frequencies.max_by{|k,v|v}[0]
end

# Given a target word, a state-of-play board, a guess, and an array of wrong guesses,
# returns an array containing the new board state and the array of wrong guesses (amended if
# the guess just made was wrong)
def make_move(target_word, board, guess, wrong_guesses)
  correct_guess = false
  # Go through each letter of the target word to hunt for the guessed letter
  target_word.length.times do |i|
    if target_word[i] == guess
      # If we find it, update the board accordingly
      correct_guess = true
      board[i] = guess
    end
  end
  # Add the guess to the list of wrong guesses if we didn't find a new letter
  wrong_guesses << guess unless correct_guess
  if DEBUG_MODE
    if correct_guess
      puts " + Correct. New board state: #{board}"
    else
      puts " + Wrong. Bad guesses so far: #{wrong_guesses.join(', ')}"
    end
  end
  # Return the result
  [board, wrong_guesses]
end

# Recursively plays an optimal game of hangman, returing a hash of statistics about the game
# suitable for storing in the 'result' array
def result_for(target_word, board, num_guesses = 0, wrong_guesses = [], dictionary = ALL_WORDS.clone)
  # If we've won, break out with a result
  return { :word => target_word, :num_guesses => num_guesses, :wrong_guesses => wrong_guesses.length } if target_word == board
  # Otherwise, filter the wordlist down and determine the best move
  puts " > Considering my best move..." if DEBUG_MODE
  unless num_guesses == 0 # (shortcut: no point filtering the wordlist if we've made no guesses)
    dictionary.select!{|word| word =~ Regexp::new("^#{board}$")}
  end
  best_move = best_move_for(board, wrong_guesses, dictionary)
  # Make the move, then continue to play
  puts " > I guess '#{best_move}'" if DEBUG_MODE
  board, wrong_guesses = make_move(target_word, board, best_move, wrong_guesses)
  result_for(target_word, board, num_guesses + 1, wrong_guesses, dictionary)
end

# Play a game for every single word in the wordlist
result = ALL_WORDS.collect do |target_word|
  puts "Target word is #{target_word}" if DEBUG_MODE
  # Set up the game with an empty board, expressed as a string e.g. "......", and guess space
  board = '.'*NUM_LETTERS
  # Play the game (recursively making and enumerating 'best moves')
  result_for(target_word, board)
end

# Sort the results (reverse order by number of wrong guesses, then reverse order by number
# of total guesses, then alphabetically by word)
puts "Sorting the results..." if DEBUG_MODE
result.sort_by!{|r| sprintf('%02d%02d%s', 26-r[:wrong_guesses], 26-r[:num_guesses], r[:word])}

# Output the results
word_column_width = (NUM_LETTERS < 4 ? 4 : NUM_LETTERS)
printf("%#{word_column_width}s %9s %7s\n", 'Word', 'Guesses', 'Wrong')
result.each do |r|
  printf("%#{word_column_width}s %9d %7d\n", r[:word], r[:num_guesses], r[:wrong_guesses])
end
	#!/usr/bin/env ruby
	# -- coding: iso-8859-1 --
	# Inspired by http://datagenetics.com/blog/april12012/index.html
	# Comes up with the "hardest words" to guess, in hangman, assuming the player is playing optimally
	# (choosing the letter most-likely to appear, given a dictionary of all candidate words and the
	# letters guessed so far [because hangman guesses are not independent; they affect the gamestate
	# either by saying "this letter DOES NOT feature" or by saying "this letter features in THESE
	# positions"])
	# Result: a "hardest hangman words" dictionary?

	# DEBUG: switch DEBUG_MODE to true to show additional output (basically, a discussion between
	# the simulated players!)
	DEBUG_MODE = false

	# DEBUG: for testing, or for simulating best-play on a particular word, use a cut-down wordlist
	# (be sure to change NUM_LETTERS if you pick a different word length, too!)
	#ALL_WORDS = %w{cat dog hen}

	# How many letters are in the words we're playing with?
	NUM_LETTERS = 6
	puts "Working with #{NUM_LETTERS}-letter words. Filtering dictionary..." if DEBUG_MODE

	# Preload a list of all possible words (using \|\|= in case we're pre-setting the words for
	# debugging purposes, earlier: note that this may produce a warning!)
	# We lowercase everything (fixes e.g. 'kHz'), remove words with special characters (removes e.g.
	# 'déjà'), and get rid of any duplicates (no point in playing more games than we have to!)
	ALL_WORDS \|\|= File::read('wordlist.txt').split.reject{\|word\| word.length != NUM_LETTERS \|\| word !~ /^[a-z]+$/}.collect{\|word\| word.downcase}.uniq
	puts "There are #{ALL_WORDS.length} words to play."

	# Given a board (a string of dots with letters in place, e.g. "ca." could be cat or cab or car
	# or whatever) and an array of WRONG guesses, returns the best-bet move. In event of tie, returns
	# the one that comes up alphabetically first
	def best_move_for(board, wrong_guesses, dictionary)
	# Set up a hash of letter frequencies ('a' => 0, 'b' => 0), excluding the known wrong guesses
	# and the letters already found
	letters_already_found = board.chars.to_a.uniq.reject{\|l\| l == '.'}
	letter_frequencies = Hash[('a'..'z').reject{\|l\| letters_already_found.include?(l) \|\| wrong_guesses.include?(l)}.map{\|l\| [l,0]}]
	# For each possible word, give a 'point' to each potential letter
	dictionary.each do \|possible_word\|
	possible_word.chars.each do \|letter\|
	letter_frequencies[letter] += 1 if letter_frequencies[letter]
	end
	end
	# The letter with the most points is the winner
	letter_frequencies.max_by{\|k,v\|v}[0]
	end

	# Given a target word, a state-of-play board, a guess, and an array of wrong guesses,
	# returns an array containing the new board state and the array of wrong guesses (amended if
	# the guess just made was wrong)
	def make_move(target_word, board, guess, wrong_guesses)
	correct_guess = false
	# Go through each letter of the target word to hunt for the guessed letter
	target_word.length.times do \|i\|
	if target_word[i] == guess
	# If we find it, update the board accordingly
	correct_guess = true
	board[i] = guess
	end
	end
	# Add the guess to the list of wrong guesses if we didn't find a new letter
	wrong_guesses << guess unless correct_guess
	if DEBUG_MODE
	if correct_guess
	puts " + Correct. New board state: #{board}"
	else
	puts " + Wrong. Bad guesses so far: #{wrong_guesses.join(', ')}"
	end
	end
	# Return the result
	[board, wrong_guesses]
	end

	# Recursively plays an optimal game of hangman, returing a hash of statistics about the game
	# suitable for storing in the 'result' array
	def result_for(target_word, board, num_guesses = 0, wrong_guesses = [], dictionary = ALL_WORDS.clone)
	# If we've won, break out with a result
	return { :word => target_word, :num_guesses => num_guesses, :wrong_guesses => wrong_guesses.length } if target_word == board
	# Otherwise, filter the wordlist down and determine the best move
	puts " > Considering my best move..." if DEBUG_MODE
	unless num_guesses == 0 # (shortcut: no point filtering the wordlist if we've made no guesses)
	dictionary.select!{\|word\| word =~ Regexp::new("^#{board}$")}
	end
	best_move = best_move_for(board, wrong_guesses, dictionary)
	# Make the move, then continue to play
	puts " > I guess '#{best_move}'" if DEBUG_MODE
	board, wrong_guesses = make_move(target_word, board, best_move, wrong_guesses)
	result_for(target_word, board, num_guesses + 1, wrong_guesses, dictionary)
	end

	# Play a game for every single word in the wordlist
	result = ALL_WORDS.collect do \|target_word\|
	puts "Target word is #{target_word}" if DEBUG_MODE
	# Set up the game with an empty board, expressed as a string e.g. "......", and guess space
	board = '.'*NUM_LETTERS
	# Play the game (recursively making and enumerating 'best moves')
	result_for(target_word, board)
	end

	# Sort the results (reverse order by number of wrong guesses, then reverse order by number
	# of total guesses, then alphabetically by word)
	puts "Sorting the results..." if DEBUG_MODE
	result.sort_by!{\|r\| sprintf('%02d%02d%s', 26-r[:wrong_guesses], 26-r[:num_guesses], r[:word])}

	# Output the results
	word_column_width = (NUM_LETTERS < 4 ? 4 : NUM_LETTERS)
	printf("%#{word_column_width}s %9s %7s\n", 'Word', 'Guesses', 'Wrong')
	result.each do \|r\|
	printf("%#{word_column_width}s %9d %7d\n", r[:word], r[:num_guesses], r[:wrong_guesses])
	end