Skip to content

Instantly share code, notes, and snippets.

@masouduut94
Last active October 23, 2020 14:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save masouduut94/7d77b0ce837bb39d2b5140543e60f568 to your computer and use it in GitHub Desktop.
Save masouduut94/7d77b0ce837bb39d2b5140543e60f568 to your computer and use it in GitHub Desktop.
class Node:
"""
Node for the MCTS. Stores the move applied to reach this node from its parent,
stats for the associated game position, children, parent and outcome
(outcome==none unless the position ends the game).
Args:
move:
parent:
N (int): times this position was visited.
Q (int): average reward (wins-losses) from this position.
Q_RAVE (int): will be explained later.
N_RAVE (int): will be explained later.
children (dict): dictionary of successive nodes.
outcome (int): If node is a leaf, then outcome indicates
the winner, else None.
"""
def __init__(self, move: tuple = None, parent: object = None):
"""
Initialize a new node with optional move and parent and initially empty
children list and rollout statistics and unspecified outcome.
"""
self.move = move
self.parent = parent
self.N = 0 # times this position was visited
self.Q = 0 # average reward (wins-losses) from this position
self.N_RAVE = 0
self.Q_RAVE = 0
self.children = {}
self.outcome = GameMeta.PLAYERS['none']
def add_children(self, children: dict) -> None:
"""
Add a list of nodes to the children of this node.
"""
for child in children:
self.children[child.move] = child
@property
def value(self, explore: float = MCTSMeta.EXPLORATION):
"""
Calculate the UCT value of this node relative to its parent, the parameter
"explore" specifies how much the value should favor nodes that have
yet to be thoroughly explored versus nodes that seem to have a high win
rate.
Currently explore is set to 0.5.
"""
# if the node is not visited, set the value as infinity. Nodes with no visits are on priority
# (lambda: print("a"), lambda: print("b"))[test==true]()
if self.N == 0:
return 0 if explore == 0 else GameMeta.INF
else:
return self.Q / self.N + explore * sqrt(2 * log(self.parent.N) / self.N) # exploitation + exploration
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment