Skip to content

Instantly share code, notes, and snippets.

@paulhankin
Created December 30, 2016 23:25
Show Gist options
  • Select an option

  • Save paulhankin/fc6a9a605998b831d278b6d41449fc47 to your computer and use it in GitHub Desktop.

Select an option

Save paulhankin/fc6a9a605998b831d278b6d41449fc47 to your computer and use it in GitHub Desktop.
Compute optimal strategy for simple poker game
# Player A and B each get a number from 0 to n-1.
# Player A can:
# check: -1, 0, +1
# raise: B fold then +1
# : B call then -2, 0, +2
# strat[i] is probability that A raises with hand i.
# Then counterB(n, strat) gives B's optimal counter strategy.
def counterB(strat):
n = len(strat)
result = []
sum_prod = sum(strat)
prod = 0
for i in xrange(n):
win = prod
tie = strat[i] / sum_prod
lose = 1.0 - win - tie
result.append(1.0 if 2 * win - 2 * lose >= -1 else 0.0)
prod += tie
return result
# counterA returns player A's optimal strategy given B's
# calling strategy.
def counterA(strat):
n = len(strat)
result = []
ev = 0.0
for i in xrange(n):
p_win = i / n
p_tie = 1 / n
ev_check = 1 * p_win - 1 * (1 - p_win - p_tie)
p_call_win = sum(strat[k] for k in xrange(i)) / n
p_call_lose = sum(strat[k] for k in xrange(i+1, n)) / n
p_fold = 1 - sum(strat) / n
ev_raise = 2 * p_call_win - 2 * p_call_lose + p_fold
result.append(1.0 if ev_raise >= ev_check else 0.0)
ev += max(ev_check, ev_raise)
return result, ev
def blend(ss, ts, f):
return [ss[i] * (1-f) + ts[i] * f for i in xrange(len(ss))]
def adjust_up(cB):
t = sum(cB)
n = len(cB)
return [min(1, max(0, t - (n - i - 1))) for i in xrange(n)]
cA = [0, 0, 0, 0, 0, 1]
cB = [0, 0, 0, 0, 0, 1]
for i in xrange(1, 1000000):
cAnew, ev = counterA(cB)
cBnew = counterB(cA)
cA = blend(cA, cAnew, 1.0/i)
cB = blend(cB, cBnew, 1.0/i)
cB = adjust_up(cB)
print ev, cA, cB
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment