Skip to content

Instantly share code, notes, and snippets.

@widovanheemstra
Last active July 24, 2018 18:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save widovanheemstra/75ca9215c614009f846739fec9e31597 to your computer and use it in GitHub Desktop.
Save widovanheemstra/75ca9215c614009f846739fec9e31597 to your computer and use it in GitHub Desktop.
from sample_players import DataPlayer
class CustomPlayer(DataPlayer):
""" Implement your own agent to play knight's Isolation
The get_action() method is the only required method for this project.
You can modify the interface for get_action by adding named parameters
with default values, but the function MUST remain compatible with the
default interface.
**********************************************************************
NOTES:
- The test cases will NOT be run on a machine with GPU access, nor be
suitable for using any other machine learning techniques.
- You can pass state forward to your agent on the next turn by assigning
any pickleable object to the self.context attribute.
**********************************************************************
"""
def get_action(self, state):
""" Employ an adversarial search technique to choose an action
available in the current state calls self.queue.put(ACTION) at least
This method must call self.queue.put(ACTION) at least once, and may
call it as many times as you want; the caller will be responsible
for cutting off the function after the search time limit has expired.
See RandomPlayer and GreedyPlayer in sample_players for more examples.
**********************************************************************
NOTE:
- The caller is responsible for cutting off search, so calling
get_action() from your own code will create an infinite loop!
Refer to (and use!) the Isolation.play() function to run games.
**********************************************************************
"""
# TODO: Replace the example implementation below with your own search
# method by combining techniques from lecture
#
# EXAMPLE: choose a random move without any search--this function MUST
# call self.queue.put(ACTION) at least once before time expires
# (the timer is automatically managed for you)
if state.ply_count < 2:
import random
self.queue.put(random.choice(state.actions()))
else:
# Iterative Deepening
for i in range(1, 100):
self.queue.put(self.alpha_beta_search(state, depth=i))
def alpha_beta_search(self, gameState, depth):
""" Return the move along a branch of the game tree that
has the best possible value. A move is a pair of coordinates
in (column, row) order corresponding to a legal move for
the searching player.
"""
def min_value(gameState, depth, alpha, beta):
""" Return the value for a win (+1) if the game is over,
otherwise return the minimum value over all legal child
nodes.
"""
# if CUTOFF_TEST(state, depth) then return EVAL(state)
if gameState.terminal_test():
return gameState.utility(self.player_id)
if depth <= 0:
return self.heur4(gameState)
v = float("inf")
for a in gameState.actions():
# TODO: modify the call to max_value()
v = min(v, max_value(gameState.result(a), depth - 1, alpha, beta))
# TODO: update the value bound
if v <= alpha:
return v
beta = min(beta, v)
return v
def max_value(gameState, depth, alpha, beta):
""" Return the value for a loss (-1) if the game is over,
otherwise return the maximum value over all legal child
nodes.
"""
if gameState.terminal_test():
return gameState.utility(self.player_id)
if depth <= 0:
return self.heur4(gameState)
v = float("-inf")
for a in gameState.actions():
# TODO: modify the call to min_value()
v = max(v, min_value(gameState.result(a), depth - 1, alpha, beta))
# TODO: update the value bound
if v >= beta:
return v
alpha = max(alpha, v)
return v
alpha = float("-inf")
beta = float("inf")
best_score = float("-inf")
best_move = None
for a in gameState.actions():
v = min_value(gameState.result(a), depth - 1, alpha, beta)
alpha = max(alpha, v)
if v > best_score:
best_score = v
best_move = a
return best_move
# - Base line heuristic #my_moves - #opp_moves
# 50% winning - 100 rounds, 100ms
# 50% winning - 100 rounds, 300ms
def baseline(self, gameState):
own_loc = gameState.locs[self.player_id]
opp_loc = gameState.locs[1 - self.player_id]
own_liberties = gameState.liberties(own_loc)
opp_liberties = gameState.liberties(opp_loc)
return len(own_liberties) - len(opp_liberties)
# - Select the moves with highest winning chance
# 50% winning - 100 rounds, 100ms
# 48% winning - 100 rounds, 300ms
def heur1(self, gameState):
own_loc = gameState.locs[self.player_id]
opp_loc = gameState.locs[1 - self.player_id]
own_liberties = gameState.liberties(own_loc)
opp_liberties = gameState.liberties(opp_loc)
return len(own_liberties) - 4 * len(opp_liberties)
# - Stay away from opponent
# 31% winning - 100 rounds, 100ms
# 33% winning - 100 rounds, 300ms
def heur2(self, gameState):
own_loc_x = gameState.locs[self.player_id] % 9
own_loc_y = gameState.locs[self.player_id] // 9
opp_loc_x = gameState.locs[1 - self.player_id] % 9
opp_loc_y = gameState.locs[1 - self.player_id] // 9
return ((own_loc_x-opp_loc_x)**2 + (own_loc_y-opp_loc_y)**2)
# - Limit opponents moves
# 23% winning - 100 rounds, 100ms
# 24% winning - 100 rounds, 300ms
def heur3(self, gameState):
own_loc = gameState.locs[self.player_id]
opp_loc = gameState.locs[1 - self.player_id]
own_liberties = set(gameState.liberties(own_loc))
opp_liberties = set(gameState.liberties(opp_loc))
return len(own_liberties.intersection(opp_liberties))
# - Select the moves with highest winning chance
# 48% winning - 100 rounds, 100ms
# % winning - 100 rounds, 300ms
def heur4(self, gameState):
own_loc = gameState.locs[self.player_id]
opp_loc = gameState.locs[1 - self.player_id]
own_liberties = gameState.liberties(own_loc)
opp_liberties = gameState.liberties(opp_loc)
return len(own_liberties)**2 - 2 * len(opp_liberties)**2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment