Skip to content

Instantly share code, notes, and snippets.

@zacharyfmarion
Last active July 25, 2018 04:31
Show Gist options
  • Save zacharyfmarion/28e3c7e9cdc64cff1a19f18d2a66dc37 to your computer and use it in GitHub Desktop.
Save zacharyfmarion/28e3c7e9cdc64cff1a19f18d2a66dc37 to your computer and use it in GitHub Desktop.
import random
import math
class Game:
def initial_state(self):
'''
Return the initial state of the game
'''
raise NotImplementedError
def action_space(self, s):
'''
For any given state returns a list of all possible valid actions
'''
raise NotImplementedError
def terminal(self, s):
'''
Returns whether a given state is terminal
'''
raise NotImplementedError
def winner(self, s, p):
'''
Returns the winner of a game, or -1 if there is no winner
'''
raise NotImplementedError
def reward(self, s, p):
'''
Returns the reward for a given state
'''
raise NotImplementedError
def next_state(self, s, a, p):
'''
Given a state, action, and player id, return the state resulting from the
player making that move
'''
raise NotImplementedError
def to_readable_string(self, s):
'''
Returns a pretty-formatted representation of the board
'''
return str(s)
def to_hash(self, s):
'''
Returns a hash of the game state, which is necessary for some algorithms
such as MCTS
'''
return hash(str(s))
class TicTacToe(Game):
def initial_state(self):
return [-1 for i in range(9)]
def action_space(self, s):
return [i for i in range(len(s)) if s[i] == -1]
def terminal(self, s):
return self.is_winner(s, 0) or self.is_winner(s, 1) or len(self.action_space(s)) == 0
def winner(self, s):
if self.is_winner(s, 0): return 0
if self.is_winner(s, 1): return 1
return -1
def reward(self, s, p):
if self.is_winner(s, p): return 1
if self.is_winner(s, 1-p): return -1
return self.heuristic(s)
def heuristic(self, s):
''' Stubbed for now '''
return 0
def next_state(self, s, a, p):
copy = s.copy()
copy[a] = p
return copy
def to_readable_string(self, s):
board = ""
for i, player in enumerate(s):
end_of_line = (i + 1) % math.sqrt(len(s)) != 0
row_line = "\n-----------\n" if i != len(s) - 1 else ""
board += " {} {}".format(self.stringify_player(player), "|" if end_of_line else row_line)
return board
def to_hash(self, s):
return hash(tuple(s))
def is_winner(self, s, p):
'''
Return whether a particular player has won the game. Ideally this would
be generalized to an nxn board.
'''
return ((s[6] == p and s[7] == p and s[8] == p) or
(s[3] == p and s[4] == p and s[5] == p) or
(s[0] == p and s[1] == p and s[2] == p) or
(s[6] == p and s[3] == p and s[0] == p) or
(s[7] == p and s[4] == p and s[1] == p) or
(s[8] == p and s[5] == p and s[2] == p) or
(s[6] == p and s[4] == p and s[2] == p) or
(s[8] == p and s[4] == p and s[0] == p))
def stringify_player(self, tile):
mapping = dict(enumerate(['X', 'O']))
return mapping.get(tile, ' ')
def minimax(g, s, p):
actions = g.action_space(s)
rewards = [min_play(g, g.next_state(s, a, p), 1-p) for a in actions]
return actions[rewards.index(max(rewards))]
def min_play(g, s, p):
actions = g.action_space(s)
if g.terminal(s): return g.reward(s, 1-p)
return min([max_play(g, g.next_state(s, a, p), 1-p) for a in actions])
def max_play(g, s, p):
actions = g.action_space(s)
if g.terminal(s): return g.reward(s, p)
return max([min_play(g, g.next_state(s, a, p), 1-p) for a in actions])
def main():
# Inititalize our game, state, and current player
game = TicTacToe()
s = game.initial_state()
p = 0
print(game.to_readable_string(s), "\n")
# Play out the full game
while not game.terminal(s):
action = minimax(game, s, p)
s[action] = p
print(game.to_readable_string(s), "\n")
p = 1 - p
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment