Skip to content

Instantly share code, notes, and snippets.

@codefever
Created October 7, 2019 09:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codefever/ff9916d4b66cce5c165898892064f3d0 to your computer and use it in GitHub Desktop.
Save codefever/ff9916d4b66cce5c165898892064f3d0 to your computer and use it in GitHub Desktop.
Tic-Tac-Toe Game Play with MCTS
#!/usr/bin/env python
import numpy as np
# from github.com/int8/monte-carlo-tree-search.git
from mctspy.tree.nodes import TwoPlayersGameMonteCarloTreeSearchNode
from mctspy.tree.search import MonteCarloTreeSearch
from mctspy.games.examples.tictactoe import TicTacToeGameState, TicTacToeMove
initial_board = np.zeros((3,3))
initial_state = TicTacToeGameState(state=initial_board, next_to_move=1) # 1/-1
current = TwoPlayersGameMonteCarloTreeSearchNode(state=initial_state)
SIMULATIONS = 1600
def coded(c):
return 'O' if c == 1 else ('X' if c == -1 else ' ')
def print_board(board):
for row in board:
line = [coded(e) for e in row]
print('|{}|'.format(''.join(line)))
class Player(object):
def __init__(self, index, state):
self.move_index = index
self.node = TwoPlayersGameMonteCarloTreeSearchNode(state)
@property
def state(self):
return self.node.state
def my_action(self):
assert self.move_index == self.node.state.next_to_move
best = MonteCarloTreeSearch(self.node).best_action(SIMULATIONS)
self.node = best
def other_action(self, state):
while not self.node.is_fully_expanded():
self.node.expand()
for c in self.node.children:
def _eq(a, b):
return np.all(a.board == b.board) and a.next_to_move == b.next_to_move
if _eq(c.state, state):
self.node = c
self.node.parent = None
break
else:
raise ValueError('cannot reach state')
class RealPlayer(object):
def __init__(self, index, state):
self.move_index = index
self._state = state
@property
def state(self):
return self._state
def my_action(self):
assert self.move_index == self._state.next_to_move
while True:
try:
line = input('\nwait for your input: ')
cords = [int(e) for e in line.split()]
move = TicTacToeMove(cords[0], cords[1], self.move_index)
self._state = self._state.move(move)
break
except (ValueError,) as e:
print('try again...')
continue
def other_action(self, state):
self._state = state
# Init
print_board(current.state.board)
# Gaming
#player = [Player(1, initial_state), Player(-1, initial_state)]
player = [RealPlayer(1, initial_state), Player(-1, initial_state)]
state = initial_state
while not state.is_game_over():
current_player, player = player[0], player[1:]
player.append(current_player)
current_player.my_action()
state = current_player.state
print('\nPlayer[{}] move:'.format(coded(-1*state.next_to_move)))
print_board(state.board)
player[0].other_action(state)
# Result
res = state.game_result
if res == 0:
print('\nNobody wins')
else:
print('\nPlayer[{}] wins'.format(coded(-1*state.next_to_move)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment