masouduut94/uct_mctsagent.py

## uct_mctsagent.py
from math import sqrt, log
from copy import deepcopy
from random import choice, random
from time import clock

from gamestate import GameState
from uct_mcstsagent import Node, UctMctsAgent
from meta import *


class RaveNode(Node):
    def __init__(self, move=None, parent=None):
        """
        Initialize a new node with optional move and parent and initially empty
        children list and rollout statistics and unspecified outcome.

        """
        super(RaveNode, self).__init__(move, parent)

    @property
    def value(self, explore: float = MCTSMeta.EXPLORATION, rave_const: float = MCTSMeta.RAVE_CONST) -> float:
        """
        Calculate the UCT value of this node relative to its parent, the parameter
        "explore" specifies how much the value should favor nodes that have
        yet to be thoroughly explored versus nodes that seem to have a high win
        rate.
        Currently explore is set to zero when choosing the best move to play so
        that the move with the highest win_rate is always chosen. When searching
        explore is set to EXPLORATION specified above.

        """
        # unless explore is set to zero, maximally favor unexplored nodes
        if self.N == 0:
            return 0 if explore is 0 else GameMeta.INF
        else:
            # rave valuation:
            alpha = max(0, (rave_const - self.N) / rave_const)
            UCT = self.Q / self.N + explore * sqrt(2 * log(self.parent.N) / self.N)
            AMAF = self.Q_RAVE / self.N_RAVE if self.N_RAVE is not 0 else 0
            return (1 - alpha) * UCT + alpha * AMAF
	from math import sqrt, log
	from copy import deepcopy
	from random import choice, random
	from time import clock

	from gamestate import GameState
	from uct_mcstsagent import Node, UctMctsAgent
	from meta import *


	class RaveNode(Node):
	def __init__(self, move=None, parent=None):
	"""
	Initialize a new node with optional move and parent and initially empty
	children list and rollout statistics and unspecified outcome.

	"""
	super(RaveNode, self).__init__(move, parent)

	@property
	def value(self, explore: float = MCTSMeta.EXPLORATION, rave_const: float = MCTSMeta.RAVE_CONST) -> float:
	"""
	Calculate the UCT value of this node relative to its parent, the parameter
	"explore" specifies how much the value should favor nodes that have
	yet to be thoroughly explored versus nodes that seem to have a high win
	rate.
	Currently explore is set to zero when choosing the best move to play so
	that the move with the highest win_rate is always chosen. When searching
	explore is set to EXPLORATION specified above.

	"""
	# unless explore is set to zero, maximally favor unexplored nodes
	if self.N == 0:
	return 0 if explore is 0 else GameMeta.INF
	else:
	# rave valuation:
	alpha = max(0, (rave_const - self.N) / rave_const)
	UCT = self.Q / self.N + explore * sqrt(2 * log(self.parent.N) / self.N)
	AMAF = self.Q_RAVE / self.N_RAVE if self.N_RAVE is not 0 else 0
	return (1 - alpha) * UCT + alpha * AMAF