thunderInfy/CR_MCTS_expand_and_evaluate.py

## CR_MCTS_expand_and_evaluate.py
def expand_and_evaluate(self, parent, action, child):
    if child.win is None:
        # it's either a non-terminal node or a terminal node that hasn't been
        # visited before

        # using controller (which knows the rules of the game) to get the next state
        next_state_obj, win = self.controller.get_next_state(parent.state, action)
        next_state = next_state_obj.get_array_view()
        if win is None:
            # not a terminal node
            child.state = next_state
            child.set_action_validity()
            child.initialize_edges()
            val = child.get_value()
        else:
            # it's a terminal node
            child.win = win
            # say win is 'red' and parent's turn was also red, that
            # means parent has won, which implies the child has lost, i.e., value = -1
            # for the child
            val = -1 if parent.state['player_turn'] == win else 1
    else:
        # it's a terminal node
        val = -1 if parent.state['player_turn'] == child.win else 1

    # updating the statistics for the newly expanded node
    child.W += val
    child.N += 1

    '''
    value for the current player positively correlates with the winning chances of the current player.
    If the current player has value +1, it means it would likely win,
    which implies its children (and its parent, if it is not the root node), which are actually
    the opposite player, would have value -1.
    hence -1 * val is passed back
    '''

    return -val
	def expand_and_evaluate(self, parent, action, child):
	if child.win is None:
	# it's either a non-terminal node or a terminal node that hasn't been
	# visited before

	# using controller (which knows the rules of the game) to get the next state
	next_state_obj, win = self.controller.get_next_state(parent.state, action)
	next_state = next_state_obj.get_array_view()
	if win is None:
	# not a terminal node
	child.state = next_state
	child.set_action_validity()
	child.initialize_edges()
	val = child.get_value()
	else:
	# it's a terminal node
	child.win = win
	# say win is 'red' and parent's turn was also red, that
	# means parent has won, which implies the child has lost, i.e., value = -1
	# for the child
	val = -1 if parent.state['player_turn'] == win else 1
	else:
	# it's a terminal node
	val = -1 if parent.state['player_turn'] == child.win else 1

	# updating the statistics for the newly expanded node
	child.W += val
	child.N += 1

	'''
	value for the current player positively correlates with the winning chances of the current player.
	If the current player has value +1, it means it would likely win,
	which implies its children (and its parent, if it is not the root node), which are actually
	the opposite player, would have value -1.
	hence -1 * val is passed back
	'''

	return -val