Last active
March 23, 2018 06:02
-
-
Save sasaco/9c276210bc71310a45e1418f2b3116b0 to your computer and use it in GitHub Desktop.
機械学習の理論を理解せずに tensorflow で オセロ AI を作ってみた 〜実装編〜 ref: https://qiita.com/sasaco/items/fdb9771c146cb877b183
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def Q_values(self, state): | |
# Q(state, action) of all actions | |
return self.sess.run(self.y, feed_dict={self.x: [state]})[0] | |
def select_action(self, state, targets, epsilon): | |
if np.random.rand() <= epsilon: | |
# random | |
return np.random.choice(targets) | |
else: | |
# max_action Q(state, action) | |
qvalue, action = self.select_enable_action(state, targets) | |
return action | |
#その盤面(state)で, 置いていい場所(targets)からQ値が最大となるQ値と番号を返す | |
def select_enable_action(self, state, targets): | |
Qs = self.Q_values(state) | |
#descend = np.sort(Qs) | |
index = np.argsort(Qs) | |
for action in reversed(index): | |
if action in targets: | |
break | |
# max_action Q(state, action) | |
qvalue = Qs[action] | |
return qvalue, action |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 保存は後攻のplayer2 を保存する。 | |
players[1].save_model() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 行動を実行した結果 | |
terminal = env.isEnd() | |
w = env.winner() | |
print("EPOCH: {:03d}/{:03d} | WIN: player{:1d}".format( | |
e, n_epochs, w)) | |
# 保存は後攻のplayer2 を保存する。 | |
players[1].save_model() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for e in range(n_epochs): | |
# reset | |
env.reset() | |
terminal = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for e in range(n_epochs): | |
# reset | |
env.reset() | |
terminal = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
while terminal == False: # 1エピソードが終わるまでループ | |
for i in range(0, len(players)): | |
state = env.screen | |
targets = env.get_enables(playerID[i]) | |
if len(targets) > 0: | |
# どこかに置く場所がある場合 | |
#← ここで、前述のすべての手を「Dに保存」しています | |
# 行動を選択 | |
action = players[i].select_action(state, targets, players[i].exploration) | |
# 行動を実行 | |
env.update(action, playerID[i]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 行動を実行した結果 | |
terminal = env.isEnd() | |
w = env.winner() | |
print("EPOCH: {:03d}/{:03d} | WIN: player{:1d}".format( | |
e, n_epochs, w)) | |
# 保存は後攻のplayer2 を保存する。 | |
players[1].save_model() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 行動を実行した結果 | |
terminal = env.isEnd() | |
w = env.winner() | |
print("EPOCH: {:03d}/{:03d} | WIN: player{:1d}".format( | |
e, n_epochs, w)) | |
# 保存は後攻のplayer2 を保存する。 | |
players[1].save_model() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
self.enable_actions[0~63] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# parameters | |
n_epochs = 1000 | |
# environment, agent | |
env = Reversi() | |
# playerID | |
playerID = [env.Black, env.White, env.Black] | |
# player agent | |
players = [] | |
# player[0]= env.Black | |
players.append(DQNAgent(env.enable_actions, env.name, env.screen_n_rows, env.screen_n_cols)) | |
# player[1]= env.White | |
players.append(DQNAgent(env.enable_actions, env.name, env.screen_n_rows, env.screen_n_cols)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment