Instantly share code, notes, and snippets.

# shahradj/cfr.py

Last active Jul 26, 2019
CFR implementation of five card stud
 import random from poker import POKER_BOX, Hand, Card #poker.py can be found at https://gist.github.com/shahradj/1761a6a521c2649bdcc54996d2bb73ec from gametree import GameTree, ACTIONS #gametree.py can be found at https://gist.github.com/shahradj/ab6e2ea5ab8cb17ed287dda0c4796ed2 import numpy as np import pandas as pd import itertools import copy from collections import Counter ''' Use conterfactual regret minimisation algorithm to play Five card stud Poker Counterfactual regret minimization uses the regret-matching algorithm. In addition, (1)one must additionally factor in the probabilities of reaching each information set given the players' strategies, and (2) given that the game is treated sequentially through a sequence of information sets, there is a passing forward of game state information and probabilities of player action sequences, and a passing backward of utility information through these information sets ''' class PokerGame: def __init__(self): random.seed() self.game_tree = GameTree() def __str__(self): return 'history={};ante={};pot={};shared_cards={}'.format(self.history, self.ante, self.pot, self.shared_cards) def reset(self, *players): random.shuffle(POKER_BOX) self.poker_box = copy.copy(POKER_BOX) def deal(self, n=1): return [self.poker_box.pop() for i in range(n)] def new_game(self, n_players=2): self.reset() cards = {i:[] for i in range(n_players)} for i in range(5*n_players): cards[i%n_players] += self.deal() return list(cards.values()) def calculate_pot(self, history, pot=50, bet=20): """calculate the pot for a given history of a two player game """ call_needed = False for char in history: if char == 'c' and call_needed: pot += bet if char == 'r': if call_needed: pot += bet call_needed = True pot += bet if char == 'f': break return pot def end(self, history, cards, verbose=False): """calculate the reward and pot of a two player game """ def best_hand(cards): # not efficient but for the sake of readability... all_hands = list(itertools.permutations(cards , 5)) # all_hands = itertools.permutations(cards + self.shared_cards, 5) scores = [Hand(hand).get_score() for hand in all_hands] return max(scores),Hand(all_hands[scores.index(max(scores))]) p1_cards, p2_cards = cards pot = self.calculate_pot(history) # case 1: someone has folded if 'f' in history: folding_player = history.index('f') winning_player = (folding_player + 1)%2 return (winning_player, pot) # case 2: entered show-down stage, needs to evaluate hands p1_best, p1_hand = best_hand(p1_cards) p2_best, p2_hand = best_hand(p2_cards) if verbose: print('P1:{}, P2:{}'.format(p1_best, p2_best)) # winner gets all if p1_best > p2_best: return 0, pot elif p1_best < p2_best: return 1, pot else: #when both hands are same, consider values of the cards p1vals = list(pd.Series(Counter(p1_hand.values)).sort_values().index) p2vals = list(pd.Series(Counter(p2_hand.values)).sort_values().index) for i in range(len(p1vals)): if p1vals[-i] > p2vals[-i]: return 0, pot if p2vals[-i] > p1vals[-i]: return 1, pot return 0.5, pot/2 # players share the pots def cfr(self, cards, history, p1_prob, p2_prob, max_actions=4): ''' returns the utility of the current node, which is determined by the history and the cards. :param cards: p1 gets cards and p2 gets cards :param history: sequence of players' actions :param p1_prob: probability that player1's strategy can reach this history :param p2_prob: probability that player2's strategy can reach this history :param max_actions: integer max number of actions of history :return: ''' # decide who is playing plays = len(history) player = plays % 2 opp = 1 - player if plays > 1: # check whether game has terminated. if len(history) >= max_actions or 'f' in history or (len(history) > 1 and history[-1]=='c'): winner, utility = self.end(history, cards) return utility if not winner or winner is player else -utility # get/create the node corresponding to the info set info_set = ''.join(sorted(cards[player], reverse=True)) + history node = self.game_tree[info_set] # use regret matching to update strategy strategy = node.update_strategy(p2_prob if player == 0 else p1_prob) # for each action, recursively call cfr with additional history and corresponding player probabilities action_utils = np.zeros(len(ACTIONS)) for i, action in enumerate(ACTIONS): next_history = history + action res = self.cfr(cards, next_history, p1_prob * strategy[i], p2_prob) if player == 0 else \ self.cfr(cards, next_history, p1_prob, p2_prob * strategy[i]) action_utils[i] = -res # for each action, compute and accumulate counterfactual regret # weighted by the probability that the opponent plays to the current information set, node_util = sum(action_utils * strategy) regret = action_utils - node_util node.regret_sum += regret * (p2_prob if player == 0 else p1_prob) return node_util def start(self, games): """run cfr for a set number of games and print the outcome of the game tree afterwards""" for g in range(games): cards = self.new_game() self.cfr(cards, '', 1, 1) print(self.game_tree) def same_game(self, games): """run CFR over many games with the same deal of cards to see how the nash equilibrium converges for that set of hands""" cards = self.new_game() for g in range(games): self.cfr(cards, '', 1, 1) print(self.game_tree) if __name__ == '__main__': PokerGame().start(1000)

### patuuh commented Jul 26, 2019

 For some reason I get gametree node values for all rounds [0.33333, 0.333333, 0.33333]. Do you know what might the the problem