Last active
October 26, 2023 11:59
-
-
Save shahradj/644c0281556cfc5701fb3c32b869ac73 to your computer and use it in GitHub Desktop.
CFR implementation of five card stud
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
from poker import POKER_BOX, Hand, Card | |
#poker.py can be found at https://gist.github.com/shahradj/1761a6a521c2649bdcc54996d2bb73ec | |
from gametree import GameTree, ACTIONS | |
#gametree.py can be found at https://gist.github.com/shahradj/ab6e2ea5ab8cb17ed287dda0c4796ed2 | |
import numpy as np | |
import pandas as pd | |
import itertools | |
import copy | |
from collections import Counter | |
''' | |
Use conterfactual regret minimisation algorithm to play Five card stud Poker | |
Counterfactual regret minimization uses the regret-matching algorithm. In addition, | |
(1)one must additionally factor in the probabilities of reaching each information set given the players' | |
strategies, and (2) given that the game is treated sequentially through a sequence of information sets, | |
there is a passing forward of game state information and probabilities of player action sequences, and | |
a passing backward of utility information through these information sets | |
''' | |
class PokerGame: | |
def __init__(self): | |
random.seed() | |
self.game_tree = GameTree() | |
def __str__(self): | |
return 'history={};ante={};pot={};shared_cards={}'.format(self.history, self.ante, self.pot, self.shared_cards) | |
def reset(self, *players): | |
random.shuffle(POKER_BOX) | |
self.poker_box = copy.copy(POKER_BOX) | |
def deal(self, n=1): | |
return [self.poker_box.pop() for i in range(n)] | |
def new_game(self, n_players=2): | |
self.reset() | |
cards = {i:[] for i in range(n_players)} | |
for i in range(5*n_players): | |
cards[i%n_players] += self.deal() | |
return list(cards.values()) | |
def calculate_pot(self, history, pot=50, bet=20): | |
"""calculate the pot for a given history of a two player game | |
""" | |
call_needed = False | |
for char in history: | |
if char == 'c' and call_needed: | |
pot += bet | |
if char == 'r': | |
if call_needed: | |
pot += bet | |
call_needed = True | |
pot += bet | |
if char == 'f': | |
break | |
return pot | |
def end(self, history, cards, verbose=False): | |
"""calculate the reward and pot of a two player game | |
""" | |
def best_hand(cards): | |
# not efficient but for the sake of readability... | |
all_hands = list(itertools.permutations(cards , 5)) | |
# all_hands = itertools.permutations(cards + self.shared_cards, 5) | |
scores = [Hand(hand).get_score() for hand in all_hands] | |
return max(scores),Hand(all_hands[scores.index(max(scores))]) | |
p1_cards, p2_cards = cards | |
pot = self.calculate_pot(history) | |
# case 1: someone has folded | |
if 'f' in history: | |
folding_player = history.index('f') | |
winning_player = (folding_player + 1)%2 | |
return (winning_player, pot) | |
# case 2: entered show-down stage, needs to evaluate hands | |
p1_best, p1_hand = best_hand(p1_cards) | |
p2_best, p2_hand = best_hand(p2_cards) | |
if verbose: | |
print('P1:{}, P2:{}'.format(p1_best, p2_best)) | |
# winner gets all | |
if p1_best > p2_best: | |
return 0, pot | |
elif p1_best < p2_best: | |
return 1, pot | |
else: | |
#when both hands are same, consider values of the cards | |
p1vals = list(pd.Series(Counter(p1_hand.values)).sort_values().index) | |
p2vals = list(pd.Series(Counter(p2_hand.values)).sort_values().index) | |
for i in range(len(p1vals)): | |
if p1vals[-i] > p2vals[-i]: | |
return 0, pot | |
if p2vals[-i] > p1vals[-i]: | |
return 1, pot | |
return 0.5, pot/2 # players share the pots | |
def cfr(self, cards, history, p1_prob, p2_prob, max_actions=4): | |
''' | |
returns the utility of the current node, which is determined by the history and the cards. | |
:param cards: p1 gets cards[0] and p2 gets cards[1] | |
:param history: sequence of players' actions | |
:param p1_prob: probability that player1's strategy can reach this history | |
:param p2_prob: probability that player2's strategy can reach this history | |
:param max_actions: integer max number of actions of history | |
:return: | |
''' | |
# decide who is playing | |
plays = len(history) | |
player = plays % 2 | |
opp = 1 - player | |
if plays > 1: | |
# check whether game has terminated. | |
if len(history) >= max_actions or 'f' in history or (len(history) > 1 and history[-1]=='c'): | |
winner, utility = self.end(history, cards) | |
return utility if not winner or winner is player else -utility | |
# get/create the node corresponding to the info set | |
info_set = ''.join(sorted(cards[player], reverse=True)) + history | |
node = self.game_tree[info_set] | |
# use regret matching to update strategy | |
strategy = node.update_strategy(p2_prob if player == 0 else p1_prob) | |
# for each action, recursively call cfr with additional history and corresponding player probabilities | |
action_utils = np.zeros(len(ACTIONS)) | |
for i, action in enumerate(ACTIONS): | |
next_history = history + action | |
res = self.cfr(cards, next_history, p1_prob * strategy[i], p2_prob) if player == 0 else \ | |
self.cfr(cards, next_history, p1_prob, p2_prob * strategy[i]) | |
action_utils[i] = -res | |
# for each action, compute and accumulate counterfactual regret | |
# weighted by the probability that the opponent plays to the current information set, | |
node_util = sum(action_utils * strategy) | |
regret = action_utils - node_util | |
node.regret_sum += regret * (p2_prob if player == 0 else p1_prob) | |
return node_util | |
def start(self, games): | |
"""run cfr for a set number of games and print the outcome of the game tree afterwards""" | |
for g in range(games): | |
cards = self.new_game() | |
self.cfr(cards, '', 1, 1) | |
print(self.game_tree) | |
def same_game(self, games): | |
"""run CFR over many games with the same deal of cards to see how the nash equilibrium converges for that set of hands""" | |
cards = self.new_game() | |
for g in range(games): | |
self.cfr(cards, '', 1, 1) | |
print(self.game_tree) | |
if __name__ == '__main__': | |
PokerGame().start(1000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
For some reason I get gametree node values for all rounds [0.33333, 0.333333, 0.33333].
Do you know what might the the problem