Last active
July 25, 2018 04:31
-
-
Save zacharyfmarion/28e3c7e9cdc64cff1a19f18d2a66dc37 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import math | |
class Game: | |
def initial_state(self): | |
''' | |
Return the initial state of the game | |
''' | |
raise NotImplementedError | |
def action_space(self, s): | |
''' | |
For any given state returns a list of all possible valid actions | |
''' | |
raise NotImplementedError | |
def terminal(self, s): | |
''' | |
Returns whether a given state is terminal | |
''' | |
raise NotImplementedError | |
def winner(self, s, p): | |
''' | |
Returns the winner of a game, or -1 if there is no winner | |
''' | |
raise NotImplementedError | |
def reward(self, s, p): | |
''' | |
Returns the reward for a given state | |
''' | |
raise NotImplementedError | |
def next_state(self, s, a, p): | |
''' | |
Given a state, action, and player id, return the state resulting from the | |
player making that move | |
''' | |
raise NotImplementedError | |
def to_readable_string(self, s): | |
''' | |
Returns a pretty-formatted representation of the board | |
''' | |
return str(s) | |
def to_hash(self, s): | |
''' | |
Returns a hash of the game state, which is necessary for some algorithms | |
such as MCTS | |
''' | |
return hash(str(s)) | |
class TicTacToe(Game): | |
def initial_state(self): | |
return [-1 for i in range(9)] | |
def action_space(self, s): | |
return [i for i in range(len(s)) if s[i] == -1] | |
def terminal(self, s): | |
return self.is_winner(s, 0) or self.is_winner(s, 1) or len(self.action_space(s)) == 0 | |
def winner(self, s): | |
if self.is_winner(s, 0): return 0 | |
if self.is_winner(s, 1): return 1 | |
return -1 | |
def reward(self, s, p): | |
if self.is_winner(s, p): return 1 | |
if self.is_winner(s, 1-p): return -1 | |
return self.heuristic(s) | |
def heuristic(self, s): | |
''' Stubbed for now ''' | |
return 0 | |
def next_state(self, s, a, p): | |
copy = s.copy() | |
copy[a] = p | |
return copy | |
def to_readable_string(self, s): | |
board = "" | |
for i, player in enumerate(s): | |
end_of_line = (i + 1) % math.sqrt(len(s)) != 0 | |
row_line = "\n-----------\n" if i != len(s) - 1 else "" | |
board += " {} {}".format(self.stringify_player(player), "|" if end_of_line else row_line) | |
return board | |
def to_hash(self, s): | |
return hash(tuple(s)) | |
def is_winner(self, s, p): | |
''' | |
Return whether a particular player has won the game. Ideally this would | |
be generalized to an nxn board. | |
''' | |
return ((s[6] == p and s[7] == p and s[8] == p) or | |
(s[3] == p and s[4] == p and s[5] == p) or | |
(s[0] == p and s[1] == p and s[2] == p) or | |
(s[6] == p and s[3] == p and s[0] == p) or | |
(s[7] == p and s[4] == p and s[1] == p) or | |
(s[8] == p and s[5] == p and s[2] == p) or | |
(s[6] == p and s[4] == p and s[2] == p) or | |
(s[8] == p and s[4] == p and s[0] == p)) | |
def stringify_player(self, tile): | |
mapping = dict(enumerate(['X', 'O'])) | |
return mapping.get(tile, ' ') | |
def minimax(g, s, p): | |
actions = g.action_space(s) | |
rewards = [min_play(g, g.next_state(s, a, p), 1-p) for a in actions] | |
return actions[rewards.index(max(rewards))] | |
def min_play(g, s, p): | |
actions = g.action_space(s) | |
if g.terminal(s): return g.reward(s, 1-p) | |
return min([max_play(g, g.next_state(s, a, p), 1-p) for a in actions]) | |
def max_play(g, s, p): | |
actions = g.action_space(s) | |
if g.terminal(s): return g.reward(s, p) | |
return max([min_play(g, g.next_state(s, a, p), 1-p) for a in actions]) | |
def main(): | |
# Inititalize our game, state, and current player | |
game = TicTacToe() | |
s = game.initial_state() | |
p = 0 | |
print(game.to_readable_string(s), "\n") | |
# Play out the full game | |
while not game.terminal(s): | |
action = minimax(game, s, p) | |
s[action] = p | |
print(game.to_readable_string(s), "\n") | |
p = 1 - p | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment