Skip to content

Instantly share code, notes, and snippets.

@hrb90
Created July 26, 2017 18:49
Show Gist options
  • Save hrb90/ded4438a0afeafadb3aeae2d6d9d42ff to your computer and use it in GitHub Desktop.
Save hrb90/ded4438a0afeafadb3aeae2d6d9d42ff to your computer and use it in GitHub Desktop.
Python implementation of algorithm for correlated equilibria described in Hart and Mas-Colell (2000)
# Written by Daniel Filan (2017) based on the algorithm described in Hart and Mas-Colell (2000)
# This code is in the public domain.
# This script implements the Hart-Mas-Colell algorithm for finding correlated
# equilibria in many-player many-action games via the players following a simple
# adaptive procedure.
import numpy as np
import random
# players are 0, 1, 2, ..., n-1
# Game 'matrix': each player i chooses index i and gets payoff equal to element
# i of the resulting list
# so, payoff to player i is
# matrix[player_0_action][player_1_action]...[player_n-1_action][i]
# For the code to work, all entries of the matrix must be non-negative numbers.
prisoners_dilemma = np.array([[[2,2], [0,4]],
[[4,0], [1,1]]])
chicken = np.array([[[6,6], [2,7]],
[[7,2], [0,0]]])
matching_pennies = np.array([[[0,1], [1,0]],
[[1,0], [0,1]]])
battle_of_sexes = np.array([[[2,1], [0,0]],
[[0,0], [1,2]]])
# this one is based off of example 2.8 of the 1973 Aumann paper
expanded_chicken = np.array([[[6,6], [0,0], [2,7]],
[[0,0], [4,4], [3,0]],
[[7,2], [0,3], [0,0]]])
# this one is based off of example 2.5 of the 1973 Aumann paper
confusing = np.array([[[[0,0,3], [2,2,2], [0,0,0]],
[[0,0,0], [0,0,0], [0,0,0]]],
[[[1,0,0], [0,0,0], [0,1,0]],
[[0,0,0], [2,2,2], [0,0,3]]]])
## TODO: add example with a nice correlated equilibrium but with no
## deterministic nash equilibria
game_matrix = matching_pennies
num_players = np.ndim(game_matrix) - 1
largest_payoff = np.amax(np.ndarray.flatten(game_matrix))
def num_actions(player):
return np.shape(game_matrix)[player]
assert np.shape(game_matrix)[num_players] == num_players, "length of payoff array doesnt match number of players"
for i in np.ndarray.flatten(game_matrix):
assert i >= 0, "not all payoffs are non-negative in your game matrix"
def replace_at_index(tup, index, val):
return tup[:index] + (val,) + tup[index + 1:]
def regret(player, old_action, alt_action, payoff_diffs, hist_length):
# payoff_diffs[player, old_action, alt_action] is an array of the difference
# in utilities player would have gotten in the past by playing alt_action
# wherever they actually played old_action
return max(0, sum(payoff_diffs[player][old_action][alt_action])
/ hist_length)
def update_payoff_diffs(payoff_diffs, action_tuple):
# payoff_diffs[player, old_action, alt_action] is an array of the difference
# in utilities player would have gotten in the past by playing alt_action
# wherever they actually played old_action
for player in range(num_players):
assert isinstance(action_tuple[player], int), "what your action tuple isn't made of integers in update_payoff_diffs??????"
old_action = action_tuple[player]
assert old_action in range(num_actions(player)), "what your action tuple contains actions that aren't in range for their players in update_payoff_diffs???????"
old_payoff = game_matrix[action_tuple][player]
for alt_action in range(num_actions(player)):
alt_action_tuple = replace_at_index(action_tuple, player,
alt_action)
alt_payoff = game_matrix[alt_action_tuple][player]
payoff_diffs[player][old_action][alt_action].append(alt_payoff
- old_payoff)
# this function computes the distribution over new actions given a previous
# action and an array of regrets for playing the previous action instead of
# other actions. this array has one entry for every action, and the entry for
# the previous action must be zero.
def prob_next_action(player, last_action, regret_array):
assert regret_array[last_action] == 0, "your regret isn't zero when it should obviously be"
normalisation_const = (num_actions(player) - 1) * largest_payoff
probs_array = [regret / normalisation_const for regret in regret_array]
prob_last_action = 1 - sum(probs_array)
probs_array[last_action] = prob_last_action
return probs_array
# this function computes a history of playing the game up to some time.
def run_procedure(num_steps):
initial_actions = []
for player in range(num_players):
initial_actions.append(random.randrange(num_actions(player)))
initial_action_tuple = tuple(initial_actions)
game_history = [initial_action_tuple]
# initialise payoff_diffs and make this work with that
# payoff_diffs[player, old_action, alt_action] is an array of the difference
# in utilities player would have gotten in the past by playing alt_action
# wherever they actually played old_action
payoff_diffs = [[] for player in range(num_players)]
for player in range(num_players):
payoff_diffs[player] = [[] for old_action in range(num_actions(player))]
for old_action in range(num_actions(player)):
payoff_diffs[player][old_action] = [[] for alt_action
in range(num_actions(player))]
for t in range(num_steps):
next_actions = []
hist_length = t + 1
update_payoff_diffs(payoff_diffs, game_history[-1])
for player in range(num_players):
actions = range(num_actions(player))
old_action = game_history[-1][player]
regret_array = [regret(player, old_action, alt_action, payoff_diffs,
hist_length)
for alt_action in actions]
prob_array = prob_next_action(player, game_history[-1][player],
regret_array)
next_actions.append(np.random.choice(num_actions(player),
p = prob_array))
game_history.append(tuple(next_actions))
return(game_history)
print(run_procedure(100))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment