widovanheemstra/my_custom_player.py

## my_custom_player.py
from sample_players import DataPlayer


class CustomPlayer(DataPlayer):
    """ Implement your own agent to play knight's Isolation

    The get_action() method is the only required method for this project.
    You can modify the interface for get_action by adding named parameters
    with default values, but the function MUST remain compatible with the
    default interface.

    **********************************************************************
    NOTES:
    - The test cases will NOT be run on a machine with GPU access, nor be
      suitable for using any other machine learning techniques.

    - You can pass state forward to your agent on the next turn by assigning
      any pickleable object to the self.context attribute.
    **********************************************************************
    """
    def get_action(self, state):
        """ Employ an adversarial search technique to choose an action
        available in the current state calls self.queue.put(ACTION) at least

        This method must call self.queue.put(ACTION) at least once, and may
        call it as many times as you want; the caller will be responsible
        for cutting off the function after the search time limit has expired.

        See RandomPlayer and GreedyPlayer in sample_players for more examples.

        **********************************************************************
        NOTE:
        - The caller is responsible for cutting off search, so calling
          get_action() from your own code will create an infinite loop!
          Refer to (and use!) the Isolation.play() function to run games.
        **********************************************************************
        """
        # TODO: Replace the example implementation below with your own search
        #       method by combining techniques from lecture
        #
        # EXAMPLE: choose a random move without any search--this function MUST
        #          call self.queue.put(ACTION) at least once before time expires
        #          (the timer is automatically managed for you)
        if state.ply_count < 2:
            import random
            self.queue.put(random.choice(state.actions()))
        else:
            # Iterative Deepening
            for i in range(1, 100):
                self.queue.put(self.alpha_beta_search(state, depth=i))

    def alpha_beta_search(self, gameState, depth):
        """ Return the move along a branch of the game tree that
        has the best possible value.  A move is a pair of coordinates
        in (column, row) order corresponding to a legal move for
        the searching player.
        """
        def min_value(gameState, depth, alpha, beta):
            """ Return the value for a win (+1) if the game is over,
            otherwise return the minimum value over all legal child
            nodes.
            """

            # if CUTOFF_TEST(state, depth) then return EVAL(state)
            if gameState.terminal_test():
                return gameState.utility(self.player_id)

            if depth <= 0:
                return self.heur4(gameState)

            v = float("inf")
            for a in gameState.actions():
                # TODO: modify the call to max_value()
                v = min(v, max_value(gameState.result(a), depth - 1, alpha, beta))
                # TODO: update the value bound
                if v <= alpha:
                    return v
                beta = min(beta, v)
            return v


        def max_value(gameState, depth, alpha, beta):
            """ Return the value for a loss (-1) if the game is over,
            otherwise return the maximum value over all legal child
            nodes.
            """
            if gameState.terminal_test():
                return gameState.utility(self.player_id)

            if depth <= 0:
                return self.heur4(gameState)

            v = float("-inf")
            for a in gameState.actions():
                # TODO: modify the call to min_value()
                v = max(v, min_value(gameState.result(a), depth - 1, alpha, beta))
                # TODO: update the value bound
                if v >= beta:
                    return v
                alpha = max(alpha, v)
            return v


        alpha = float("-inf")
        beta = float("inf")
        best_score = float("-inf")
        best_move = None
        for a in gameState.actions():
            v = min_value(gameState.result(a), depth - 1, alpha, beta)
            alpha = max(alpha, v)
            if v > best_score:
                best_score = v
                best_move = a
        return best_move


    # - Base line heuristic #my_moves - #opp_moves
    # 50% winning - 100 rounds, 100ms
    # 50% winning - 100 rounds, 300ms
    def baseline(self, gameState):
        own_loc = gameState.locs[self.player_id]
        opp_loc = gameState.locs[1 - self.player_id]
        own_liberties = gameState.liberties(own_loc)
        opp_liberties = gameState.liberties(opp_loc)
        return len(own_liberties) - len(opp_liberties)


    # - Select the moves with highest winning chance
    # 50% winning - 100 rounds, 100ms
    # 48% winning - 100 rounds, 300ms
    def heur1(self, gameState):
        own_loc = gameState.locs[self.player_id]
        opp_loc = gameState.locs[1 - self.player_id]
        own_liberties = gameState.liberties(own_loc)
        opp_liberties = gameState.liberties(opp_loc)
        return len(own_liberties) - 4 * len(opp_liberties)


    # - Stay away from opponent
    # 31% winning - 100 rounds, 100ms
    # 33% winning - 100 rounds, 300ms
    def heur2(self, gameState):
        own_loc_x = gameState.locs[self.player_id] % 9
        own_loc_y = gameState.locs[self.player_id] // 9
        opp_loc_x = gameState.locs[1 - self.player_id] % 9
        opp_loc_y = gameState.locs[1 - self.player_id] // 9
        return ((own_loc_x-opp_loc_x)**2 + (own_loc_y-opp_loc_y)**2)


    # - Limit opponents moves
    # 23% winning - 100 rounds, 100ms
    # 24% winning - 100 rounds, 300ms
    def heur3(self, gameState):
        own_loc = gameState.locs[self.player_id]
        opp_loc = gameState.locs[1 - self.player_id]
        own_liberties = set(gameState.liberties(own_loc))
        opp_liberties = set(gameState.liberties(opp_loc))
        return len(own_liberties.intersection(opp_liberties))


    # - Select the moves with highest winning chance
    # 48% winning - 100 rounds, 100ms
    # % winning - 100 rounds, 300ms
    def heur4(self, gameState):
        own_loc = gameState.locs[self.player_id]
        opp_loc = gameState.locs[1 - self.player_id]
        own_liberties = gameState.liberties(own_loc)
        opp_liberties = gameState.liberties(opp_loc)
        return len(own_liberties)**2 - 2 * len(opp_liberties)**2
	from sample_players import DataPlayer


	class CustomPlayer(DataPlayer):
	""" Implement your own agent to play knight's Isolation

	The get_action() method is the only required method for this project.
	You can modify the interface for get_action by adding named parameters
	with default values, but the function MUST remain compatible with the
	default interface.

	**********************************************************************
	NOTES:
	- The test cases will NOT be run on a machine with GPU access, nor be
	suitable for using any other machine learning techniques.

	- You can pass state forward to your agent on the next turn by assigning
	any pickleable object to the self.context attribute.
	**********************************************************************
	"""
	def get_action(self, state):
	""" Employ an adversarial search technique to choose an action
	available in the current state calls self.queue.put(ACTION) at least

	This method must call self.queue.put(ACTION) at least once, and may
	call it as many times as you want; the caller will be responsible
	for cutting off the function after the search time limit has expired.

	See RandomPlayer and GreedyPlayer in sample_players for more examples.

	**********************************************************************
	NOTE:
	- The caller is responsible for cutting off search, so calling
	get_action() from your own code will create an infinite loop!
	Refer to (and use!) the Isolation.play() function to run games.
	**********************************************************************
	"""
	# TODO: Replace the example implementation below with your own search
	# method by combining techniques from lecture
	#
	# EXAMPLE: choose a random move without any search--this function MUST
	# call self.queue.put(ACTION) at least once before time expires
	# (the timer is automatically managed for you)
	if state.ply_count < 2:
	import random
	self.queue.put(random.choice(state.actions()))
	else:
	# Iterative Deepening
	for i in range(1, 100):
	self.queue.put(self.alpha_beta_search(state, depth=i))

	def alpha_beta_search(self, gameState, depth):
	""" Return the move along a branch of the game tree that
	has the best possible value. A move is a pair of coordinates
	in (column, row) order corresponding to a legal move for
	the searching player.
	"""
	def min_value(gameState, depth, alpha, beta):
	""" Return the value for a win (+1) if the game is over,
	otherwise return the minimum value over all legal child
	nodes.
	"""

	# if CUTOFF_TEST(state, depth) then return EVAL(state)
	if gameState.terminal_test():
	return gameState.utility(self.player_id)

	if depth <= 0:
	return self.heur4(gameState)

	v = float("inf")
	for a in gameState.actions():
	# TODO: modify the call to max_value()
	v = min(v, max_value(gameState.result(a), depth - 1, alpha, beta))
	# TODO: update the value bound
	if v <= alpha:
	return v
	beta = min(beta, v)
	return v


	def max_value(gameState, depth, alpha, beta):
	""" Return the value for a loss (-1) if the game is over,
	otherwise return the maximum value over all legal child
	nodes.
	"""
	if gameState.terminal_test():
	return gameState.utility(self.player_id)

	if depth <= 0:
	return self.heur4(gameState)

	v = float("-inf")
	for a in gameState.actions():
	# TODO: modify the call to min_value()
	v = max(v, min_value(gameState.result(a), depth - 1, alpha, beta))
	# TODO: update the value bound
	if v >= beta:
	return v
	alpha = max(alpha, v)
	return v


	alpha = float("-inf")
	beta = float("inf")
	best_score = float("-inf")
	best_move = None
	for a in gameState.actions():
	v = min_value(gameState.result(a), depth - 1, alpha, beta)
	alpha = max(alpha, v)
	if v > best_score:
	best_score = v
	best_move = a
	return best_move


	# - Base line heuristic #my_moves - #opp_moves
	# 50% winning - 100 rounds, 100ms
	# 50% winning - 100 rounds, 300ms
	def baseline(self, gameState):
	own_loc = gameState.locs[self.player_id]
	opp_loc = gameState.locs[1 - self.player_id]
	own_liberties = gameState.liberties(own_loc)
	opp_liberties = gameState.liberties(opp_loc)
	return len(own_liberties) - len(opp_liberties)


	# - Select the moves with highest winning chance
	# 50% winning - 100 rounds, 100ms
	# 48% winning - 100 rounds, 300ms
	def heur1(self, gameState):
	own_loc = gameState.locs[self.player_id]
	opp_loc = gameState.locs[1 - self.player_id]
	own_liberties = gameState.liberties(own_loc)
	opp_liberties = gameState.liberties(opp_loc)
	return len(own_liberties) - 4 * len(opp_liberties)


	# - Stay away from opponent
	# 31% winning - 100 rounds, 100ms
	# 33% winning - 100 rounds, 300ms
	def heur2(self, gameState):
	own_loc_x = gameState.locs[self.player_id] % 9
	own_loc_y = gameState.locs[self.player_id] // 9
	opp_loc_x = gameState.locs[1 - self.player_id] % 9
	opp_loc_y = gameState.locs[1 - self.player_id] // 9
	return ((own_loc_x-opp_loc_x)2 + (own_loc_y-opp_loc_y)2)


	# - Limit opponents moves
	# 23% winning - 100 rounds, 100ms
	# 24% winning - 100 rounds, 300ms
	def heur3(self, gameState):
	own_loc = gameState.locs[self.player_id]
	opp_loc = gameState.locs[1 - self.player_id]
	own_liberties = set(gameState.liberties(own_loc))
	opp_liberties = set(gameState.liberties(opp_loc))
	return len(own_liberties.intersection(opp_liberties))


	# - Select the moves with highest winning chance
	# 48% winning - 100 rounds, 100ms
	# % winning - 100 rounds, 300ms
	def heur4(self, gameState):
	own_loc = gameState.locs[self.player_id]
	opp_loc = gameState.locs[1 - self.player_id]
	own_liberties = gameState.liberties(own_loc)
	opp_liberties = gameState.liberties(opp_loc)
	return len(own_liberties)*2 - 2 len(opp_liberties)**2