Skip to content

Instantly share code, notes, and snippets.

@masouduut94
Created August 27, 2023 21:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save masouduut94/6254555dac7f04736affd939d24abb34 to your computer and use it in GitHub Desktop.
Save masouduut94/6254555dac7f04736affd939d24abb34 to your computer and use it in GitHub Desktop.
from math import sqrt, log
from copy import deepcopy
from random import choice, random
from time import clock
from gamestate import GameState
from uct_mcstsagent import Node, UctMctsAgent
from meta import *
class RaveNode(Node):
def __init__(self, move=None, parent=None):
"""
Initialize a new node with optional move and parent and initially empty
children list and rollout statistics and unspecified outcome.
"""
super(RaveNode, self).__init__(move, parent)
@property
def value(self, explore: float = MCTSMeta.EXPLORATION, rave_const: float = MCTSMeta.RAVE_CONST) -> float:
"""
Calculate the UCT value of this node relative to its parent, the parameter
"explore" specifies how much the value should favor nodes that have
yet to be thoroughly explored versus nodes that seem to have a high win
rate.
Currently explore is set to zero when choosing the best move to play so
that the move with the highest win_rate is always chosen. When searching
explore is set to EXPLORATION specified above.
"""
# unless explore is set to zero, maximally favor unexplored nodes
if self.N == 0:
return 0 if explore is 0 else GameMeta.INF
else:
# rave valuation:
alpha = max(0, (rave_const - self.N) / rave_const)
UCT = self.Q / self.N + explore * sqrt(2 * log(self.parent.N) / self.N)
AMAF = self.Q_RAVE / self.N_RAVE if self.N_RAVE is not 0 else 0
return (1 - alpha) * UCT + alpha * AMAF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment