Skip to content

Instantly share code, notes, and snippets.

@vinczebalazs
Created January 29, 2021 10:17
Show Gist options
  • Save vinczebalazs/3b494e62aa5914580ff6423f27906301 to your computer and use it in GitHub Desktop.
Save vinczebalazs/3b494e62aa5914580ff6423f27906301 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
A basic adaptive bot. This is part of the third worksheet.
"""
from api import State, util
import random, os
from itertools import chain
import joblib
# Path of the model we will use. If you make a model
# with a different name, point this line to its path.
DEFAULT_MODEL = os.path.abspath("models/m_rand_g50000_f1_MLPC.pkl")
class Bot:
__max_depth = -1
__randomize = True
__model = None
def __init__(self, randomize=True, model_file=DEFAULT_MODEL, depth=8):
self.__randomize = randomize
# Load the model
self.__model = joblib.load(model_file)
def get_move(self, state):
if state.get_phase() == 1:
move = self.ml_value(state)[1]
return move
else:
move = self.alphabeta_value(state)[1]
return move
def ml_value(self, state):
"""
Return the value of this state and the associated move
:param state:
:return: val, move: the value of the state, and the best move.
"""
best_value = float('-inf') if maximizing(state) else float('inf')
best_move = None
moves = state.moves()
if self.__randomize:
random.shuffle(moves)
for move in moves:
next_state = state.next(move)
value = self.ml_heuristic(next_state)
if maximizing(state):
if value > best_value:
best_value = value
best_move = move
else:
if value < best_value:
best_value = value
best_move = move
return best_value, best_move
def ml_heuristic(self, state):
feature_vector = [features(state)]
classes = list(self.__model.classes_)
prob = self.__model.predict_proba(feature_vector)[0]
# Weigh the win/loss outcomes (-1 and 1) by their probabilities
res = -1.0 * prob[classes.index('lost')] + 1.0 * prob[classes.index('won')]
return res
def alphabeta_value(self, state, alpha=float('-inf'), beta=float('inf'), depth = 0):
"""
Return the value of this state and the associated move
:param State state:
:param float alpha: The highest score that the maximizing player can guarantee given current knowledge
:param float beta: The lowest score that the minimizing player can guarantee given current knowledge
:param int depth: How deep we are in the tree
:return val, move: the value of the state, and the best move.
"""
if state.finished():
winner, points = state.winner()
return (points, None) if winner == 1 else (-points, None)
if depth == self.__max_depth:
return self.ml_heuristic(state)
best_value = float('-inf') if maximizing(state) else float('inf')
best_move = None
moves = state.moves()
if self.__randomize:
random.shuffle(moves)
for move in moves:
next_state = state.next(move)
value, _ = self.alphabeta_value(next_state, alpha, beta, depth+1)
if maximizing(state):
if value > best_value:
best_value = value
best_move = move
alpha = best_value
else:
if value < best_value:
best_value = value
best_move = move
beta = best_value
# Prune the search tree
if maximizing(state):
if best_value > beta:
break
else:
if best_value < alpha:
break
return best_value, best_move
def maximizing(state):
"""
Whether we're the maximizing player (1) or the minimizing player (2).
:param state:
:return:
"""
return state.whose_turn() == 1
def features(state):
# type: (State) -> tuple[float, ...]
"""
Extract features from this state. Remember that every feature vector returned should have the same length.
:param state: A state to be converted to a feature vector
:return: A tuple of floats: a feature vector representing this state.
"""
feature_set = []
# Add player 1's points to feature set
p1_points = state.get_points(1)
# Add player 2's points to feature set
p2_points = state.get_points(2)
# Add player 1's pending points to feature set
p1_pending_points = state.get_pending_points(1)
# Add player 2's pending points to feature set
p2_pending_points = state.get_pending_points(2)
# Get trump suit
trump_suit = state.get_trump_suit()
# Add phase to feature set
phase = state.get_phase()
# Add stock size to feature set
stock_size = state.get_stock_size()
# Add leader to feature set
leader = state.leader()
# Add whose turn it is to feature set
whose_turn = state.whose_turn()
# Add opponent's played card to feature set
opponents_played_card = state.get_opponents_played_card()
################## You do not need to do anything below this line ########################
perspective = state.get_perspective()
# Perform one-hot encoding on the perspective.
# Learn more about one-hot here: https://machinelearningmastery.com/how-to-one-hot-encode-sequence-data-in-python/
perspective = [card if card != 'U' else [1, 0, 0, 0, 0, 0] for card in perspective]
perspective = [card if card != 'S' else [0, 1, 0, 0, 0, 0] for card in perspective]
perspective = [card if card != 'P1H' else [0, 0, 1, 0, 0, 0] for card in perspective]
perspective = [card if card != 'P2H' else [0, 0, 0, 1, 0, 0] for card in perspective]
perspective = [card if card != 'P1W' else [0, 0, 0, 0, 1, 0] for card in perspective]
perspective = [card if card != 'P2W' else [0, 0, 0, 0, 0, 1] for card in perspective]
# Append one-hot encoded perspective to feature_set
feature_set += list(chain(*perspective))
# Append normalized points to feature_set
total_points = p1_points + p2_points
feature_set.append(p1_points/total_points if total_points > 0 else 0.)
feature_set.append(p2_points/total_points if total_points > 0 else 0.)
# Append normalized pending points to feature_set
total_pending_points = p1_pending_points + p2_pending_points
feature_set.append(p1_pending_points/total_pending_points if total_pending_points > 0 else 0.)
feature_set.append(p2_pending_points/total_pending_points if total_pending_points > 0 else 0.)
# Convert trump suit to id and add to feature set
# You don't need to add anything to this part
suits = ["C", "D", "H", "S"]
trump_suit_onehot = [0, 0, 0, 0]
trump_suit_onehot[suits.index(trump_suit)] = 1
feature_set += trump_suit_onehot
# Append one-hot encoded phase to feature set
feature_set += [1, 0] if phase == 1 else [0, 1]
# Append normalized stock size to feature set
feature_set.append(stock_size/10)
# Append one-hot encoded leader to feature set
feature_set += [1, 0] if leader == 1 else [0, 1]
# Append one-hot encoded whose_turn to feature set
feature_set += [1, 0] if whose_turn == 1 else [0, 1]
# Append one-hot encoded opponent's card to feature set
opponents_played_card_onehot = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
opponents_played_card_onehot[opponents_played_card if opponents_played_card is not None else 20] = 1
feature_set += opponents_played_card_onehot
# Return feature set
return feature_set
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment