Skip to content

Instantly share code, notes, and snippets.

@VoLKyyyOG
Last active February 3, 2024 16:13
Show Gist options
  • Save VoLKyyyOG/05df9b25bef3b066d99ddf67e0395092 to your computer and use it in GitHub Desktop.
Save VoLKyyyOG/05df9b25bef3b066d99ddf67e0395092 to your computer and use it in GitHub Desktop.
MP-Mix Adversarial Search (Python)
def directed_offensive(state, counts, heuristic, max_player, target, min_eval=inf, depth_left=MAX_DEPTH):
"""
:summary: An algorithm aimed to MINIMISE a target player used in a 3 player
scenario with no good pruning techniques possible.
:assumption: all players will wish to maximise themselves (like a typical Max^n algorithm)
:strategy: If we find an evaluation minimising a target's evaluation,
and it is beneficial to us, that becomes our "best action".
:returns: tuple of (evaluation, action_if_any)
"""
if not depth_left:
evals = heuristic(state)
# Value should be offset by the situation the target is in
evals[PLAYER_HASH[max_player]] -= desperation(state)[PLAYER_HASH[target]]
return (evals, None)
max_player_evals = [-inf]*N_PLAYERS
best_action, best_new_action = None, None
player = state.turn
index = PLAYER_HASH[player]
target_index = PLAYER_HASH[target]
generated_actions = state.possible_actions(player, sort=(player==max_player))
for action in generated_actions:
new_state = State.apply_action(state, action)
# Get vector of evaluations
player_eval = directed_offensive(new_state, counts, heuristic, max_player, target, min_eval, depth_left-1)[0]
if player != max_player:
# NOT the max_player - we assume they will want to just maximise themselves
if player_eval[index] > max_player_evals[index]:
max_player_evals, best_action = player_eval, action
if new_state.hash not in counts:
best_new_action = best_action
else:
# If this new eval LOWERS our target eval and our eval is NOT WORSE, then update our path with this action
if player_eval[target_index] < min_eval and player_eval[index] >= max_player_evals[index]:
max_player_evals, best_action, min_eval = player_eval, action, player_eval[target_index]
if new_state.hash not in counts:
best_new_action = best_action
return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
def paranoid(state, counts, heuristic, max_player, alpha=-inf, beta=inf, depth_left=MAX_DEPTH, loser=False):
"""
:summary: Paranoid assuming a 1 vs rest scenario.
Used when winning / losing given a certain threshold.
The implementation also uses alpha-beta pruning, with the assumption of good ordering.
:returns: tuple of (evaluation, action_if_any)
"""
if not depth_left:
evals = heuristic(state)
if loser:
evals[PLAYER_HASH[max_player]] += desperation(state)[PLAYER_HASH[max_player]]
return (evals, None)
best_action, best_new_action = None, None
player = state.turn
index = PLAYER_HASH[player]
generated_actions = state.possible_actions(player)
for action in generated_actions:
new_state = State.apply_action(state, action)
# Only want vector of evaluations
player_eval = paranoid(new_state, counts, heuristic, max_player, alpha, beta, depth_left-1)[0]
if (player == max_player):
# MaximisingPlayer wants to maximise alpha
if player_eval[index] > alpha:
alpha, best_action = player_eval[index], action
if new_state.hash not in counts:
best_new_action = best_action
else:
# MinimisingPlayer wants to worsen beta
if player_eval[index] < beta:
beta, best_action = player_eval[index], action
if new_state.hash not in counts:
best_new_action = best_action
if alpha >= beta:
return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
def alpha_beta(state, counts, heuristic, max_player, alpha=-inf, beta=inf, depth_left=MAX_DEPTH):
"""
:summary: Simple yet effective implementation of minimax with alpha-beta pruning.
:returns: vector of (evaluation, action_if_any)
"""
if not depth_left:
evals = heuristic(state)
return (evals, None)
best_action, best_new_action = None, None
player = state.turn
index = PLAYER_HASH[player]
generated_actions = state.possible_actions(player)
for action in generated_actions:
new_state = State.apply_action(state, action, ignore_dead=True)
# Only want vector of evaluations
player_eval = alpha_beta(new_state, counts, heuristic, max_player, alpha, beta, depth_left-1)[0]
if (player == max_player):
# MaximisingPlayer wants to maximise alpha
if player_eval[index] > alpha:
alpha, best_action = player_eval[index], action
if new_state.hash not in counts:
best_new_action = best_action
else:
# MinimisingPlayer wants to worsen beta
if player_eval[index] < beta:
beta, best_action = player_eval[index], action
if new_state.hash not in counts:
best_new_action = best_action
if alpha >= beta:
return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
def max_n(state, counts, heuristic, max_player_evals=[-inf]*N_PLAYERS, depth_left=MAX_DEPTH):
"""
:summary: Max^N. A 3 player variant of minimax with no good pruning techniques available.
Pretty useless (due to limited pruning) and is used little in game.
:returns: tuple of (evaluation, action_if_any)
"""
if not depth_left:
evals = heuristic(state)
return (evals, None)
best_action, best_new_action = None, None
player = state.turn
index = PLAYER_HASH[player]
generated_actions = state.possible_actions(player)
for action in generated_actions:
new_state = State.apply_action(state, action)
# Get vector of evaluations
player_eval = max_n(new_state, counts, heuristic, max_player_evals, depth_left-1)[0]
if player_eval[index] > max_player_evals[index]:
max_player_evals, best_action = player_eval, action
if new_state.hash not in counts:
best_new_action = best_action
return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
########################### ALPHA-BETA ALTERNATIVE ##########################
def negamax(state, counts, heuristic, max_player, alpha=-inf, beta=inf, depth_left=MAX_DEPTH):
"""
:summary: Simple yet effective implementation of Negamax with alpha-beta pruning.
The possible moves functions will always return good ordering for optimal pruning.
:assumption: one player is dead - hence the game is merely 2-player.
:returns: tuple of (evaluation, action_if_any)
"""
if not depth_left:
evals = runner(state)[PLAYER_HASH[max_player]]
return (-evals, None)
best_action, best_new_action = None, None
player = state["turn"]
for action in possible_actions(state, player):
new_state = apply_action(state, action, ignore_dead=True)
new_eval = -negamax(new_state, counts,heuristic, max_player, -beta, -alpha, depth_left - 1)[0]
if new_eval >= beta:
return (beta, best_new_action) if best_new_action is not None else (beta, best_action)
if new_eval > alpha:
alpha, best_action = new_eval, action
if Z_hash(new_state) not in counts:
best_new_action = best_action
return (alpha, best_new_action) if best_new_action is not None else (alpha, best_action)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment