Created
July 26, 2017 18:49
-
-
Save hrb90/ded4438a0afeafadb3aeae2d6d9d42ff to your computer and use it in GitHub Desktop.
Python implementation of algorithm for correlated equilibria described in Hart and Mas-Colell (2000)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Written by Daniel Filan (2017) based on the algorithm described in Hart and Mas-Colell (2000) | |
# This code is in the public domain. | |
# This script implements the Hart-Mas-Colell algorithm for finding correlated | |
# equilibria in many-player many-action games via the players following a simple | |
# adaptive procedure. | |
import numpy as np | |
import random | |
# players are 0, 1, 2, ..., n-1 | |
# Game 'matrix': each player i chooses index i and gets payoff equal to element | |
# i of the resulting list | |
# so, payoff to player i is | |
# matrix[player_0_action][player_1_action]...[player_n-1_action][i] | |
# For the code to work, all entries of the matrix must be non-negative numbers. | |
prisoners_dilemma = np.array([[[2,2], [0,4]], | |
[[4,0], [1,1]]]) | |
chicken = np.array([[[6,6], [2,7]], | |
[[7,2], [0,0]]]) | |
matching_pennies = np.array([[[0,1], [1,0]], | |
[[1,0], [0,1]]]) | |
battle_of_sexes = np.array([[[2,1], [0,0]], | |
[[0,0], [1,2]]]) | |
# this one is based off of example 2.8 of the 1973 Aumann paper | |
expanded_chicken = np.array([[[6,6], [0,0], [2,7]], | |
[[0,0], [4,4], [3,0]], | |
[[7,2], [0,3], [0,0]]]) | |
# this one is based off of example 2.5 of the 1973 Aumann paper | |
confusing = np.array([[[[0,0,3], [2,2,2], [0,0,0]], | |
[[0,0,0], [0,0,0], [0,0,0]]], | |
[[[1,0,0], [0,0,0], [0,1,0]], | |
[[0,0,0], [2,2,2], [0,0,3]]]]) | |
## TODO: add example with a nice correlated equilibrium but with no | |
## deterministic nash equilibria | |
game_matrix = matching_pennies | |
num_players = np.ndim(game_matrix) - 1 | |
largest_payoff = np.amax(np.ndarray.flatten(game_matrix)) | |
def num_actions(player): | |
return np.shape(game_matrix)[player] | |
assert np.shape(game_matrix)[num_players] == num_players, "length of payoff array doesnt match number of players" | |
for i in np.ndarray.flatten(game_matrix): | |
assert i >= 0, "not all payoffs are non-negative in your game matrix" | |
def replace_at_index(tup, index, val): | |
return tup[:index] + (val,) + tup[index + 1:] | |
def regret(player, old_action, alt_action, payoff_diffs, hist_length): | |
# payoff_diffs[player, old_action, alt_action] is an array of the difference | |
# in utilities player would have gotten in the past by playing alt_action | |
# wherever they actually played old_action | |
return max(0, sum(payoff_diffs[player][old_action][alt_action]) | |
/ hist_length) | |
def update_payoff_diffs(payoff_diffs, action_tuple): | |
# payoff_diffs[player, old_action, alt_action] is an array of the difference | |
# in utilities player would have gotten in the past by playing alt_action | |
# wherever they actually played old_action | |
for player in range(num_players): | |
assert isinstance(action_tuple[player], int), "what your action tuple isn't made of integers in update_payoff_diffs??????" | |
old_action = action_tuple[player] | |
assert old_action in range(num_actions(player)), "what your action tuple contains actions that aren't in range for their players in update_payoff_diffs???????" | |
old_payoff = game_matrix[action_tuple][player] | |
for alt_action in range(num_actions(player)): | |
alt_action_tuple = replace_at_index(action_tuple, player, | |
alt_action) | |
alt_payoff = game_matrix[alt_action_tuple][player] | |
payoff_diffs[player][old_action][alt_action].append(alt_payoff | |
- old_payoff) | |
# this function computes the distribution over new actions given a previous | |
# action and an array of regrets for playing the previous action instead of | |
# other actions. this array has one entry for every action, and the entry for | |
# the previous action must be zero. | |
def prob_next_action(player, last_action, regret_array): | |
assert regret_array[last_action] == 0, "your regret isn't zero when it should obviously be" | |
normalisation_const = (num_actions(player) - 1) * largest_payoff | |
probs_array = [regret / normalisation_const for regret in regret_array] | |
prob_last_action = 1 - sum(probs_array) | |
probs_array[last_action] = prob_last_action | |
return probs_array | |
# this function computes a history of playing the game up to some time. | |
def run_procedure(num_steps): | |
initial_actions = [] | |
for player in range(num_players): | |
initial_actions.append(random.randrange(num_actions(player))) | |
initial_action_tuple = tuple(initial_actions) | |
game_history = [initial_action_tuple] | |
# initialise payoff_diffs and make this work with that | |
# payoff_diffs[player, old_action, alt_action] is an array of the difference | |
# in utilities player would have gotten in the past by playing alt_action | |
# wherever they actually played old_action | |
payoff_diffs = [[] for player in range(num_players)] | |
for player in range(num_players): | |
payoff_diffs[player] = [[] for old_action in range(num_actions(player))] | |
for old_action in range(num_actions(player)): | |
payoff_diffs[player][old_action] = [[] for alt_action | |
in range(num_actions(player))] | |
for t in range(num_steps): | |
next_actions = [] | |
hist_length = t + 1 | |
update_payoff_diffs(payoff_diffs, game_history[-1]) | |
for player in range(num_players): | |
actions = range(num_actions(player)) | |
old_action = game_history[-1][player] | |
regret_array = [regret(player, old_action, alt_action, payoff_diffs, | |
hist_length) | |
for alt_action in actions] | |
prob_array = prob_next_action(player, game_history[-1][player], | |
regret_array) | |
next_actions.append(np.random.choice(num_actions(player), | |
p = prob_array)) | |
game_history.append(tuple(next_actions)) | |
return(game_history) | |
print(run_procedure(100)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment