zacharyfmarion/minmax_tic_tac_toe.py

## minmax_tic_tac_toe.py
import random
import math

class Game:
  def initial_state(self):
    '''
    Return the initial state of the game
    '''
    raise NotImplementedError

  def action_space(self, s):
    '''
    For any given state returns a list of all possible valid actions
    '''
    raise NotImplementedError

  def terminal(self, s):
    '''
    Returns whether a given state is terminal
    '''
    raise NotImplementedError

  def winner(self, s, p):
    '''
    Returns the winner of a game, or -1 if there is no winner
    '''
    raise NotImplementedError

  def reward(self, s, p):
    '''
    Returns the reward for a given state
    '''
    raise NotImplementedError

  def next_state(self, s, a, p):
    '''
    Given a state, action, and player id, return the state resulting from the
    player making that move
    '''
    raise NotImplementedError

  def to_readable_string(self, s):
    '''
    Returns a pretty-formatted representation of the board
    '''
    return str(s)

  def to_hash(self, s):
    '''
    Returns a hash of the game state, which is necessary for some algorithms
    such as MCTS
    '''
    return hash(str(s))

class TicTacToe(Game):
  def initial_state(self):
    return [-1 for i in range(9)]

  def action_space(self, s):
    return [i for i in range(len(s)) if s[i] == -1]

  def terminal(self, s):
    return self.is_winner(s, 0) or self.is_winner(s, 1) or len(self.action_space(s)) == 0

  def winner(self, s):
    if self.is_winner(s, 0): return 0
    if self.is_winner(s, 1): return 1
    return -1

  def reward(self, s, p):
    if self.is_winner(s, p): return 1
    if self.is_winner(s, 1-p): return -1
    return self.heuristic(s)

  def heuristic(self, s):
    ''' Stubbed for now '''
    return 0

  def next_state(self, s, a, p):
    copy = s.copy()
    copy[a] = p
    return copy

  def to_readable_string(self, s):
    board = ""
    for i, player in enumerate(s):
        end_of_line = (i + 1) % math.sqrt(len(s)) != 0
        row_line = "\n-----------\n" if i != len(s) - 1 else ""
        board += " {} {}".format(self.stringify_player(player), "|" if end_of_line else row_line)
    return board

  def to_hash(self, s):
    return hash(tuple(s))

  def is_winner(self, s, p):
    '''
    Return whether a particular player has won the game. Ideally this would
    be generalized to an nxn board.
    '''
    return ((s[6] == p and s[7] == p and s[8] == p) or
    (s[3] == p and s[4] == p and s[5] == p) or
    (s[0] == p and s[1] == p and s[2] == p) or
    (s[6] == p and s[3] == p and s[0] == p) or
    (s[7] == p and s[4] == p and s[1] == p) or
    (s[8] == p and s[5] == p and s[2] == p) or
    (s[6] == p and s[4] == p and s[2] == p) or
    (s[8] == p and s[4] == p and s[0] == p))

  def stringify_player(self, tile):
    mapping = dict(enumerate(['X', 'O']))
    return mapping.get(tile, ' ')

def minimax(g, s, p):
  actions = g.action_space(s)
  rewards = [min_play(g, g.next_state(s, a, p), 1-p) for a in actions]
  return actions[rewards.index(max(rewards))]

def min_play(g, s, p):
  actions = g.action_space(s)
  if g.terminal(s): return g.reward(s, 1-p)
  return min([max_play(g, g.next_state(s, a, p), 1-p) for a in actions])

def max_play(g, s, p):
  actions = g.action_space(s)
  if g.terminal(s): return g.reward(s, p)
  return max([min_play(g, g.next_state(s, a, p), 1-p) for a in actions])

def main():
  # Inititalize our game, state, and current player
  game = TicTacToe()
  s = game.initial_state()
  p = 0
  print(game.to_readable_string(s), "\n")
  # Play out the full game
  while not game.terminal(s):
    action = minimax(game, s, p)
    s[action] = p
    print(game.to_readable_string(s), "\n")
    p = 1 - p

if __name__ == '__main__':
  main()
	import random
	import math

	class Game:
	def initial_state(self):
	'''
	Return the initial state of the game
	'''
	raise NotImplementedError

	def action_space(self, s):
	'''
	For any given state returns a list of all possible valid actions
	'''
	raise NotImplementedError

	def terminal(self, s):
	'''
	Returns whether a given state is terminal
	'''
	raise NotImplementedError

	def winner(self, s, p):
	'''
	Returns the winner of a game, or -1 if there is no winner
	'''
	raise NotImplementedError

	def reward(self, s, p):
	'''
	Returns the reward for a given state
	'''
	raise NotImplementedError

	def next_state(self, s, a, p):
	'''
	Given a state, action, and player id, return the state resulting from the
	player making that move
	'''
	raise NotImplementedError

	def to_readable_string(self, s):
	'''
	Returns a pretty-formatted representation of the board
	'''
	return str(s)

	def to_hash(self, s):
	'''
	Returns a hash of the game state, which is necessary for some algorithms
	such as MCTS
	'''
	return hash(str(s))

	class TicTacToe(Game):
	def initial_state(self):
	return [-1 for i in range(9)]

	def action_space(self, s):
	return [i for i in range(len(s)) if s[i] == -1]

	def terminal(self, s):
	return self.is_winner(s, 0) or self.is_winner(s, 1) or len(self.action_space(s)) == 0

	def winner(self, s):
	if self.is_winner(s, 0): return 0
	if self.is_winner(s, 1): return 1
	return -1

	def reward(self, s, p):
	if self.is_winner(s, p): return 1
	if self.is_winner(s, 1-p): return -1
	return self.heuristic(s)

	def heuristic(self, s):
	''' Stubbed for now '''
	return 0

	def next_state(self, s, a, p):
	copy = s.copy()
	copy[a] = p
	return copy

	def to_readable_string(self, s):
	board = ""
	for i, player in enumerate(s):
	end_of_line = (i + 1) % math.sqrt(len(s)) != 0
	row_line = "\n-----------\n" if i != len(s) - 1 else ""
	board += " {} {}".format(self.stringify_player(player), "\|" if end_of_line else row_line)
	return board

	def to_hash(self, s):
	return hash(tuple(s))

	def is_winner(self, s, p):
	'''
	Return whether a particular player has won the game. Ideally this would
	be generalized to an nxn board.
	'''
	return ((s[6] == p and s[7] == p and s[8] == p) or
	(s[3] == p and s[4] == p and s[5] == p) or
	(s[0] == p and s[1] == p and s[2] == p) or
	(s[6] == p and s[3] == p and s[0] == p) or
	(s[7] == p and s[4] == p and s[1] == p) or
	(s[8] == p and s[5] == p and s[2] == p) or
	(s[6] == p and s[4] == p and s[2] == p) or
	(s[8] == p and s[4] == p and s[0] == p))

	def stringify_player(self, tile):
	mapping = dict(enumerate(['X', 'O']))
	return mapping.get(tile, ' ')

	def minimax(g, s, p):
	actions = g.action_space(s)
	rewards = [min_play(g, g.next_state(s, a, p), 1-p) for a in actions]
	return actions[rewards.index(max(rewards))]

	def min_play(g, s, p):
	actions = g.action_space(s)
	if g.terminal(s): return g.reward(s, 1-p)
	return min([max_play(g, g.next_state(s, a, p), 1-p) for a in actions])

	def max_play(g, s, p):
	actions = g.action_space(s)
	if g.terminal(s): return g.reward(s, p)
	return max([min_play(g, g.next_state(s, a, p), 1-p) for a in actions])

	def main():
	# Inititalize our game, state, and current player
	game = TicTacToe()
	s = game.initial_state()
	p = 0
	print(game.to_readable_string(s), "\n")
	# Play out the full game
	while not game.terminal(s):
	action = minimax(game, s, p)
	s[action] = p
	print(game.to_readable_string(s), "\n")
	p = 1 - p

	if __name__ == '__main__':
	main()