thunderInfy/CR_MCTS_selection.py

## CR_MCTS_selection.py
# backward passes of results from MCTS is handled through recursion
def selection(self, node, root = False, logging = False, actions = None):

    # finds PUCT val for child nodes and returns the best child (the one with the max PUCT val)
    # and the best action for parent
    best_child, best_action = self.select_best_child(node, root)

    # ignore this logging part, this is not the core part of this function
    if(logging):
        if actions == None:
            actions = []
        actions.append(best_action[0]*args.M+best_action[1])

    if best_child.state is None:
        # either a terminal node or a node that has to be expanded
        val = self.expand_and_evaluate(node, best_action, best_child)

        # ignore this logging part, this is not the core part of this function, used
        # for visualization
        if(logging):
            with open('data.txt','a') as fout:
                fout.write(', '.join([str(i) for i in actions]))
                fout.write('\n')

    else:
        # recursive call to selection function
        val = self.selection(best_child, False, logging, actions)

    node.W += val
    node.N += 1

    '''
    value for the current player positively correlates with the winning chances of the current player.
    If the current player has value +1, it means it would likely win,
    which implies its children (and its parent, if it is not the root node), which are actually
    the opposite player, would have value -1.
    hence -1 * val is passed back
    '''
    return -val
	# backward passes of results from MCTS is handled through recursion
	def selection(self, node, root = False, logging = False, actions = None):

	# finds PUCT val for child nodes and returns the best child (the one with the max PUCT val)
	# and the best action for parent
	best_child, best_action = self.select_best_child(node, root)

	# ignore this logging part, this is not the core part of this function
	if(logging):
	if actions == None:
	actions = []
	actions.append(best_action[0]*args.M+best_action[1])

	if best_child.state is None:
	# either a terminal node or a node that has to be expanded
	val = self.expand_and_evaluate(node, best_action, best_child)

	# ignore this logging part, this is not the core part of this function, used
	# for visualization
	if(logging):
	with open('data.txt','a') as fout:
	fout.write(', '.join([str(i) for i in actions]))
	fout.write('\n')

	else:
	# recursive call to selection function
	val = self.selection(best_child, False, logging, actions)

	node.W += val
	node.N += 1

	'''
	value for the current player positively correlates with the winning chances of the current player.
	If the current player has value +1, it means it would likely win,
	which implies its children (and its parent, if it is not the root node), which are actually
	the opposite player, would have value -1.
	hence -1 * val is passed back
	'''
	return -val