carlos-aguayo/MCTS.py

## MCTS.py
# https://github.com/suragnair/alpha-zero-general/blob/5156c7fd1d2f3e5fefe732a4b2e0ffc5b272f819/MCTS.py#L105-L121
cur_best = -float('inf')
best_act = -1

# pick the action with the highest upper confidence bound
for a in range(self.game.getActionSize()):
    if valids[a]:
        if (s, a) in self.Qsa:
            u = self.Qsa[(s, a)] + self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s]) / (
                    1 + self.Nsa[(s, a)])
        else:
            u = self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s] + EPS)  # Q = 0 ?

        if u > cur_best:
            cur_best = u
            best_act = a

a = best_act
next_s, next_player = self.game.getNextState(canonicalBoard, 1, a)
next_s = self.game.getCanonicalForm(next_s, next_player)

# Recursively visit the node
v = self.search(next_s)
	# https://github.com/suragnair/alpha-zero-general/blob/5156c7fd1d2f3e5fefe732a4b2e0ffc5b272f819/MCTS.py#L105-L121
	cur_best = -float('inf')
	best_act = -1

	# pick the action with the highest upper confidence bound
	for a in range(self.game.getActionSize()):
	if valids[a]:
	if (s, a) in self.Qsa:
	u = self.Qsa[(s, a)] + self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s]) / (
	1 + self.Nsa[(s, a)])
	else:
	u = self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s] + EPS) # Q = 0 ?

	if u > cur_best:
	cur_best = u
	best_act = a

	a = best_act
	next_s, next_player = self.game.getNextState(canonicalBoard, 1, a)
	next_s = self.game.getCanonicalForm(next_s, next_player)

	# Recursively visit the node
	v = self.search(next_s)