Skip to content

Instantly share code, notes, and snippets.

@thomasahle
Last active January 7, 2022 16:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thomasahle/711bc74f00ef15d30ef11f68b2ac8c70 to your computer and use it in GitHub Desktop.
Save thomasahle/711bc74f00ef15d30ef11f68b2ac8c70 to your computer and use it in GitHub Desktop.
def play(r1, r2, replay_buffer):
privs = [game.make_priv(r1, 0), game.make_priv(r2, 1)]
def play_inner(state):
cur = game.get_cur(state) # Current player id
calls = game.get_calls(state) # Bets made by player so far
if calls and calls[-1] == game.LIE_ACTION:
prev_call = calls[-2] if len(calls) >= 2 else -1
# If prev_call is good it mean we won (because our opponent called lie)
res = 1 if game.evaluate_call(r1, r2, prev_call) else -1
else:
# Sample a random action based on values from the network
# We add a +epsilon to the regrets to promote exploration
action = game.sample_action(privs[cur], state, args.eps)
new_state = game.apply_action(state, action)
# Just classic min/max stuff
res = -play_inner(new_state)
# Save the result from the perspective of both sides
replay_buffer.append((privs[cur], state, res))
replay_buffer.append((privs[1 - cur], state, -res))
return res
state = game.make_state()
play_inner(state)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment