Skip to content

Instantly share code, notes, and snippets.

@NikolasTzimoulis
Last active December 13, 2015 19:48
Show Gist options
  • Save NikolasTzimoulis/4965057 to your computer and use it in GitHub Desktop.
Save NikolasTzimoulis/4965057 to your computer and use it in GitHub Desktop.
Probabilistic forward-backward induction for solving centipede-style games.
leavePayoffs = ( (0, 0), (-1, 3), (2, 2), (1, 5), (4, 4), (3, 7), (6, 6), (5, 9), (8, 8), (7, 11), (10, 10) )
#leavePayoffs = ( (0, 0), (-1, 1), (2, -2), (-3, 3), (4, -4), (-5, 5), (6, -6), (-7, 7), (8, -8), (-9, 9), (10, -10) )
backgroundPriors = [(0.99999, 1.0)]
lambdas = [(sum([lam[0] for lam in backgroundPriors])/len(backgroundPriors) , sum([lam[1] for lam in backgroundPriors])/len(backgroundPriors))]*len(leavePayoffs)
expectedPayoffs = [(0.0, 0.0)]*len(leavePayoffs) # initial values are overwritten, don't fret over them
lambdasGivenStay = [(0.0, 1.0)]*(len(leavePayoffs)-1) # initial values are overwritten, don't fret over them
getPlayer = lambda node: node%2 # returns 0 for player 1's decision nodes and 1 for player 2's nodes
other = lambda player: (player+1)%2 # returns the other player
lambdaThreshold = lambda a1, a2, b1, b2: (float(a2) - b2) / (-a1 + a2 + b1 - b2)
maxRounds = 10
for round in range(maxRounds):
print 'new lambda intervals:', lambdas
# backwards induction to update expected payoffs
for dnode in reversed(range(len(leavePayoffs))):
if dnode == len(leavePayoffs) - 1: # if this is the last node
expectedPayoffs[dnode] = leavePayoffs[dnode] # then there is no choice
else:
# calculate threshold value of lambda that changes the decision
u_a1 = leavePayoffs[dnode][getPlayer(dnode)]
u_a2 = leavePayoffs[dnode][other(getPlayer(dnode))]
u_b1 = expectedPayoffs[dnode+1][getPlayer(dnode)]
u_b2 = expectedPayoffs[dnode+1][other(getPlayer(dnode))]
try:
thres = max(0.0, min(1.0, lambdaThreshold(u_a1, u_a2, u_b1, u_b2)))
# see which decision goes with the interval above the threshold
# and which decision goes with the interval below the threshold
if u_b2 - u_b1 > u_a2 - u_a1: # lambda > threshold => prefer (a1, a2)
u_above_threshold = leavePayoffs[dnode]
u_below_threshold = expectedPayoffs[dnode+1]
# apply stay constraint to lambda interval (to use for lambda update)
lambdasGivenStay[dnode] = ( lambdas[dnode][0]/2 , (thres+lambdas[dnode][1])/2 )
else:
u_above_threshold = expectedPayoffs[dnode+1]
u_below_threshold = leavePayoffs[dnode]
# apply stay constraint to lambda interval (to use for lambda update)
lambdasGivenStay[dnode] = ( (thres+lambdas[dnode][0])/2, (1.0+lambdas[dnode][1])/2 )
except ZeroDivisionError:
thres = 0.0
if u_b2 < u_a2:
u_above_threshold = leavePayoffs[dnode]
u_below_threshold = expectedPayoffs[dnode+1]
# apply stay constraint to lambda interval (to use for lambda update)
lambdasGivenStay[dnode] = ( max(0.0,lambdas[dnode][0]) , min(thres,lambdas[dnode][1]))
else:
u_above_threshold = expectedPayoffs[dnode+1]
u_below_threshold = leavePayoffs[dnode]
# apply stay constraint to lambda interval (to use for lambda update)
lambdasGivenStay[dnode] = ( max(thres,lambdas[dnode][0]), min(1.0,lambdas[dnode][1]) )
# calculate normalising constant so that
# the probability mass of lambda is 1.0
norm_constant = 1.0 / ( lambdas[dnode][1] - lambdas[dnode][0] )
# get the size of the intervals (lambda_min, thres) and (thres, lambda_max)
interval_above = max(0.0, lambdas[dnode][1] - max(thres, lambdas[dnode][0]))
interval_below = max(0.0, min(thres, lambdas[dnode][1]) - lambdas[dnode][0])
# mix decisions according to intervals to get expected payoffs
node_payoffs = []
for i in range(2):
node_payoffs.append(norm_constant * (interval_above * u_above_threshold[i] + interval_below * u_below_threshold[i]))
expectedPayoffs[dnode] = (node_payoffs[0], node_payoffs[1])
print 'new expected payoffs:', expectedPayoffs
print
# forward induction to update lambdas
for dnode in range(len(leavePayoffs)):
nodePriors = backgroundPriors + lambdasGivenStay[0:dnode]
lambdas[dnode] = (sum([lam[0] for lam in nodePriors])/len(nodePriors) , sum([lam[1] for lam in nodePriors])/len(nodePriors))
import random
leavePayoffs = ( (0, 0), (-1, 3), (2, 2), (1, 5), (4, 4), (3, 7), (6, 6), (5, 9), (8, 8), (7, 11), (10, 10) )
#leavePayoffs = ( (0, 0), (-1, 1), (2, -2), (-3, 3), (4, -4), (-5, 5), (6, -6), (-7, 7), (8, -8), (-9, 9), (10, -10) )
backgroundPriors = [(0.99999, 1.0)]
lambdas = [(sum([lam[0] for lam in backgroundPriors])/len(backgroundPriors) , sum([lam[1] for lam in backgroundPriors])/len(backgroundPriors))]*len(leavePayoffs)
expectedPayoffs = [(0.0, 0.0)]*len(leavePayoffs) # initial values are overwritten, don't fret over them
lambdasGivenStay = [(0.0, 1.0)]*(len(leavePayoffs)-1) # initial values are overwritten, don't fret over them
getPlayer = lambda node: node%2 # returns 0 for player 1's decision nodes and 1 for player 2's nodes
other = lambda player: (player+1)%2 # returns the other player
lambdaThreshold = lambda a1, a2, b1, b2: (float(a2) - b2) / (-a1 + a2 + b1 - b2)
maxRounds = 1000
resultCounter = {}
for round in range(maxRounds):
print 'new lambda intervals:', lambdas
# backwards induction to update expected payoffs
for dnode in reversed(range(len(leavePayoffs))):
if dnode == len(leavePayoffs) - 1: # if this is the last node
expectedPayoffs[dnode] = leavePayoffs[dnode] # then there is no choice
else:
# calculate threshold value of lambda that changes the decision
u_a1 = leavePayoffs[dnode][getPlayer(dnode)]
u_a2 = leavePayoffs[dnode][other(getPlayer(dnode))]
u_b1 = expectedPayoffs[dnode+1][getPlayer(dnode)]
u_b2 = expectedPayoffs[dnode+1][other(getPlayer(dnode))]
try:
thres = max(0.0, min(1.0, lambdaThreshold(u_a1, u_a2, u_b1, u_b2)))
# see which decision goes with the interval above the threshold
# and which decision goes with the interval below the threshold
if u_b2 - u_b1 > u_a2 - u_a1: # lambda > threshold => prefer (a1, a2)
u_above_threshold = leavePayoffs[dnode]
u_below_threshold = expectedPayoffs[dnode+1]
# apply stay constraint to lambda interval (to use for lambda update)
lambdasGivenStay[dnode] = ( lambdas[dnode][0]/2 , (thres+lambdas[dnode][1])/2 )
else:
u_above_threshold = expectedPayoffs[dnode+1]
u_below_threshold = leavePayoffs[dnode]
# apply stay constraint to lambda interval (to use for lambda update)
lambdasGivenStay[dnode] = ( (thres+lambdas[dnode][0])/2, (1.0+lambdas[dnode][1])/2 )
except ZeroDivisionError:
thres = 0.0
if u_b2 < u_a2:
u_above_threshold = leavePayoffs[dnode]
u_below_threshold = expectedPayoffs[dnode+1]
# apply stay constraint to lambda interval (to use for lambda update)
lambdasGivenStay[dnode] = ( max(0.0,lambdas[dnode][0]) , min(thres,lambdas[dnode][1]))
else:
u_above_threshold = expectedPayoffs[dnode+1]
u_below_threshold = leavePayoffs[dnode]
# apply stay constraint to lambda interval (to use for lambda update)
lambdasGivenStay[dnode] = ( max(thres,lambdas[dnode][0]), min(1.0,lambdas[dnode][1]) )
# sample the lambda distribution
lambda_sample = random.uniform(lambdas[dnode][0], lambdas[dnode][1])
# assign node expected utility based on sample
if lambda_sample >= thres:
expectedPayoffs[dnode] = u_above_threshold
else:
expectedPayoffs[dnode] = u_below_threshold
if expectedPayoffs[0] in resultCounter:
resultCounter[expectedPayoffs[0]] += 1
else:
resultCounter[expectedPayoffs[0]] = 1
print 'new expected payoffs:', expectedPayoffs
print
# forward induction to update lambdas
for dnode in range(len(leavePayoffs)):
nodePriors = backgroundPriors + lambdasGivenStay[0:dnode]
lambdas[dnode] = (sum([lam[0] for lam in nodePriors])/len(nodePriors) , sum([lam[1] for lam in nodePriors])/len(nodePriors))
print resultCounter
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment