Created
May 20, 2018 05:53
-
-
Save macrat/f36bd1571415ef9905c3c38a89fae8af to your computer and use it in GitHub Desktop.
chainerでマルバツゲーム。失敗作。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
import chainer | |
import chainer.links as L | |
import chainer.functions as F | |
def judge(board): | |
""" | |
>>> judge(numpy.array([[ 1, 1, 1], | |
... [.5, 0, .5], | |
... [.5, 0, .5]])) | |
1 | |
>>> judge(numpy.array([[ 0, .5, .5], | |
... [.5, 0, .5], | |
... [ 1, .5, 0]])) | |
-1 | |
>>> judge(numpy.array([[ 1, 0, 0], | |
... [.5, 0, .5], | |
... [ 0, .5, .5]])) | |
-1 | |
>>> judge(numpy.array([[.5, 1, .5], | |
... [ 0, 1, .5], | |
... [.5, 1, 0]])) | |
1 | |
""" | |
board = board.reshape((3, 3)) | |
result = 0 | |
for i in range(3): | |
if numpy.all(board[i] == 1.0): | |
result = 1 | |
elif numpy.all(board[i] == 0.0): | |
result = -1 | |
if numpy.all(board[:,i] == 1.0): | |
result = 1 | |
elif numpy.all(board[:,i] == 0.0): | |
result = -1 | |
if board[0, 0] == board[1, 1] == board[2, 2] == 1.0: | |
result = 1 | |
elif board[0, 0] == board[1, 1] == board[2, 2] == 0.0: | |
result = -1 | |
if board[2, 0] == board[1, 1] == board[0, 2] == 1.0: | |
result = 1 | |
elif board[2, 0] == board[1, 1] == board[0, 2] == 0.0: | |
result = -1 | |
return result | |
def print_board(board): | |
for row in board.reshape([3, 3]): | |
print(':{}:'.format(':'.join( | |
{0.0: 'x', 0.5: ' ', 1.0: 'o'}[x] | |
for x in row | |
))) | |
def random_put(board, turn): | |
if numpy.any(board == 0.5): | |
while True: | |
pos = numpy.random.randint(0, 9) | |
if board[pos] == 0.5: | |
board[pos] = turn | |
break | |
class TicTacToeModel(chainer.Chain): | |
def __init__(self): | |
super().__init__( | |
l1=L.Linear(9, 9), | |
l2=L.Linear(9, 9), | |
l3=L.Linear(9, 9), | |
l4=L.Linear(9, 9), | |
) | |
self.optimizer = chainer.optimizers.AdaDelta() | |
self.optimizer.setup(self) | |
def _predict(self, states): | |
h = F.relu(self.l1(states.reshape((-1, 9)))) | |
h = F.relu(self.l2(h)) | |
h = F.relu(self.l3(h)) | |
h = F.relu(self.l4(h)) | |
return h | |
def predict(self, states): | |
return self._predict(states).data.reshape((3, 3)) | |
def fit(self): | |
board = numpy.zeros([9], dtype=numpy.float32) | |
while judge(board) != 0: | |
board = numpy.ones([9], dtype=numpy.float32) / 2 | |
turn = 1 | |
for i in range(numpy.random.randint(0, 4)): | |
random_put(board, turn) | |
turn = int(not turn) | |
log = [board[:]] | |
wants = [turn] | |
while judge(board) == 0 and numpy.any(board == 0.5): | |
print_board(board) | |
print() | |
h = model._predict(board).data.reshape([9]) | |
for v in sorted(tuple(h)): | |
pos = (numpy.logical_and(h == v, board == 0.5)).argmax() | |
if board[pos] == 0.5: | |
board[pos] = turn | |
log.append(board[:]) | |
wants.append(turn) | |
break | |
turn = int(not turn) | |
print_board(board) | |
result = numpy.array([(judge(board) + 1) / 2] * len(log), | |
dtype=numpy.float32) | |
loss = F.mean_squared_error(result, | |
numpy.array(wants, dtype=numpy.float32)) | |
self.zerograds() | |
loss.backward() | |
self.optimizer.update() | |
def do_game(self, enemy_func): | |
board = numpy.ones([9], dtype=numpy.float32) / 2 | |
while judge(board) == 0 and numpy.any(board == 0.5): | |
print_board(board) | |
print() | |
h = model._predict(board).data.reshape([9]) | |
for v in sorted(tuple(h)): | |
pos = (numpy.logical_and(h == v, board == 0.5)).argmax() | |
if board[pos] == 0.5: | |
board[pos] = 1 | |
break | |
enemy_func(board) | |
print_board(board) | |
if judge(board) == 1: | |
print('won!!!') | |
elif judge(board) == -1: | |
print('lose...') | |
else: | |
print('draw') | |
print() | |
return board | |
if __name__ == '__main__': | |
model = TicTacToeModel() | |
for i in range(1000): | |
model.fit() | |
print() | |
print('-------') | |
print() | |
num = 100 | |
win = 0 | |
lose = 0 | |
for i in range(num): | |
r = judge(model.do_game(lambda board: random_put(board, 0.0))) | |
print('-------') | |
if r == 1: | |
win += 1 | |
elif r == -1: | |
lose += 1 | |
print('win: {:.2%}, draw: {:.2%}, lose: {:.2%}'.format(win/num, (num-win-lose)/num, lose/num)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment