Skip to content

Instantly share code, notes, and snippets.

@sasaco
Last active March 23, 2018 06:02
Show Gist options
  • Save sasaco/9c276210bc71310a45e1418f2b3116b0 to your computer and use it in GitHub Desktop.
Save sasaco/9c276210bc71310a45e1418f2b3116b0 to your computer and use it in GitHub Desktop.
機械学習の理論を理解せずに tensorflow で オセロ AI を作ってみた 〜実装編〜 ref: https://qiita.com/sasaco/items/fdb9771c146cb877b183
def Q_values(self, state):
# Q(state, action) of all actions
return self.sess.run(self.y, feed_dict={self.x: [state]})[0]
def select_action(self, state, targets, epsilon):
if np.random.rand() <= epsilon:
# random
return np.random.choice(targets)
else:
# max_action Q(state, action)
qvalue, action = self.select_enable_action(state, targets)
return action
#その盤面(state)で, 置いていい場所(targets)からQ値が最大となるQ値と番号を返す
def select_enable_action(self, state, targets):
Qs = self.Q_values(state)
#descend = np.sort(Qs)
index = np.argsort(Qs)
for action in reversed(index):
if action in targets:
break
# max_action Q(state, action)
qvalue = Qs[action]
return qvalue, action
# 保存は後攻のplayer2 を保存する。
players[1].save_model()
# 行動を実行した結果
terminal = env.isEnd()
w = env.winner()
print("EPOCH: {:03d}/{:03d} | WIN: player{:1d}".format(
e, n_epochs, w))
# 保存は後攻のplayer2 を保存する。
players[1].save_model()
for e in range(n_epochs):
# reset
env.reset()
terminal = False
for e in range(n_epochs):
# reset
env.reset()
terminal = False
while terminal == False: # 1エピソードが終わるまでループ
for i in range(0, len(players)):
state = env.screen
targets = env.get_enables(playerID[i])
if len(targets) > 0:
# どこかに置く場所がある場合
#← ここで、前述のすべての手を「Dに保存」しています
# 行動を選択
action = players[i].select_action(state, targets, players[i].exploration)
# 行動を実行
env.update(action, playerID[i])
# 行動を実行した結果
terminal = env.isEnd()
w = env.winner()
print("EPOCH: {:03d}/{:03d} | WIN: player{:1d}".format(
e, n_epochs, w))
# 保存は後攻のplayer2 を保存する。
players[1].save_model()
# 行動を実行した結果
terminal = env.isEnd()
w = env.winner()
print("EPOCH: {:03d}/{:03d} | WIN: player{:1d}".format(
e, n_epochs, w))
# 保存は後攻のplayer2 を保存する。
players[1].save_model()
self.enable_actions[0~63]
# parameters
n_epochs = 1000
# environment, agent
env = Reversi()
# playerID
playerID = [env.Black, env.White, env.Black]
# player agent
players = []
# player[0]= env.Black
players.append(DQNAgent(env.enable_actions, env.name, env.screen_n_rows, env.screen_n_cols))
# player[1]= env.White
players.append(DQNAgent(env.enable_actions, env.name, env.screen_n_rows, env.screen_n_cols))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment