Skip to content

Instantly share code, notes, and snippets.

@macrat
Created May 20, 2018 05:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save macrat/f36bd1571415ef9905c3c38a89fae8af to your computer and use it in GitHub Desktop.
Save macrat/f36bd1571415ef9905c3c38a89fae8af to your computer and use it in GitHub Desktop.
chainerでマルバツゲーム。失敗作。
import numpy
import chainer
import chainer.links as L
import chainer.functions as F
def judge(board):
"""
>>> judge(numpy.array([[ 1, 1, 1],
... [.5, 0, .5],
... [.5, 0, .5]]))
1
>>> judge(numpy.array([[ 0, .5, .5],
... [.5, 0, .5],
... [ 1, .5, 0]]))
-1
>>> judge(numpy.array([[ 1, 0, 0],
... [.5, 0, .5],
... [ 0, .5, .5]]))
-1
>>> judge(numpy.array([[.5, 1, .5],
... [ 0, 1, .5],
... [.5, 1, 0]]))
1
"""
board = board.reshape((3, 3))
result = 0
for i in range(3):
if numpy.all(board[i] == 1.0):
result = 1
elif numpy.all(board[i] == 0.0):
result = -1
if numpy.all(board[:,i] == 1.0):
result = 1
elif numpy.all(board[:,i] == 0.0):
result = -1
if board[0, 0] == board[1, 1] == board[2, 2] == 1.0:
result = 1
elif board[0, 0] == board[1, 1] == board[2, 2] == 0.0:
result = -1
if board[2, 0] == board[1, 1] == board[0, 2] == 1.0:
result = 1
elif board[2, 0] == board[1, 1] == board[0, 2] == 0.0:
result = -1
return result
def print_board(board):
for row in board.reshape([3, 3]):
print(':{}:'.format(':'.join(
{0.0: 'x', 0.5: ' ', 1.0: 'o'}[x]
for x in row
)))
def random_put(board, turn):
if numpy.any(board == 0.5):
while True:
pos = numpy.random.randint(0, 9)
if board[pos] == 0.5:
board[pos] = turn
break
class TicTacToeModel(chainer.Chain):
def __init__(self):
super().__init__(
l1=L.Linear(9, 9),
l2=L.Linear(9, 9),
l3=L.Linear(9, 9),
l4=L.Linear(9, 9),
)
self.optimizer = chainer.optimizers.AdaDelta()
self.optimizer.setup(self)
def _predict(self, states):
h = F.relu(self.l1(states.reshape((-1, 9))))
h = F.relu(self.l2(h))
h = F.relu(self.l3(h))
h = F.relu(self.l4(h))
return h
def predict(self, states):
return self._predict(states).data.reshape((3, 3))
def fit(self):
board = numpy.zeros([9], dtype=numpy.float32)
while judge(board) != 0:
board = numpy.ones([9], dtype=numpy.float32) / 2
turn = 1
for i in range(numpy.random.randint(0, 4)):
random_put(board, turn)
turn = int(not turn)
log = [board[:]]
wants = [turn]
while judge(board) == 0 and numpy.any(board == 0.5):
print_board(board)
print()
h = model._predict(board).data.reshape([9])
for v in sorted(tuple(h)):
pos = (numpy.logical_and(h == v, board == 0.5)).argmax()
if board[pos] == 0.5:
board[pos] = turn
log.append(board[:])
wants.append(turn)
break
turn = int(not turn)
print_board(board)
result = numpy.array([(judge(board) + 1) / 2] * len(log),
dtype=numpy.float32)
loss = F.mean_squared_error(result,
numpy.array(wants, dtype=numpy.float32))
self.zerograds()
loss.backward()
self.optimizer.update()
def do_game(self, enemy_func):
board = numpy.ones([9], dtype=numpy.float32) / 2
while judge(board) == 0 and numpy.any(board == 0.5):
print_board(board)
print()
h = model._predict(board).data.reshape([9])
for v in sorted(tuple(h)):
pos = (numpy.logical_and(h == v, board == 0.5)).argmax()
if board[pos] == 0.5:
board[pos] = 1
break
enemy_func(board)
print_board(board)
if judge(board) == 1:
print('won!!!')
elif judge(board) == -1:
print('lose...')
else:
print('draw')
print()
return board
if __name__ == '__main__':
model = TicTacToeModel()
for i in range(1000):
model.fit()
print()
print('-------')
print()
num = 100
win = 0
lose = 0
for i in range(num):
r = judge(model.do_game(lambda board: random_put(board, 0.0)))
print('-------')
if r == 1:
win += 1
elif r == -1:
lose += 1
print('win: {:.2%}, draw: {:.2%}, lose: {:.2%}'.format(win/num, (num-win-lose)/num, lose/num))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment