hrb90/hart-mas-colell.py

## hart-mas-colell.py
# Written by Daniel Filan (2017) based on the algorithm described in Hart and Mas-Colell (2000)

# This code is in the public domain.

# This script implements the Hart-Mas-Colell algorithm for finding correlated
# equilibria in many-player many-action games via the players following a simple
# adaptive procedure.

import numpy as np
import random

# players are 0, 1, 2, ..., n-1

# Game 'matrix': each player i chooses index i and gets payoff equal to element
# i of the resulting list

# so, payoff to player i is
# matrix[player_0_action][player_1_action]...[player_n-1_action][i]

# For the code to work, all entries of the matrix must be non-negative numbers.

prisoners_dilemma = np.array([[[2,2], [0,4]],
                              [[4,0], [1,1]]])

chicken = np.array([[[6,6], [2,7]],
                    [[7,2], [0,0]]])

matching_pennies = np.array([[[0,1], [1,0]],
                             [[1,0], [0,1]]])

battle_of_sexes = np.array([[[2,1], [0,0]],
                            [[0,0], [1,2]]])

# this one is based off of example 2.8 of the 1973 Aumann paper
expanded_chicken = np.array([[[6,6], [0,0], [2,7]],
                             [[0,0], [4,4], [3,0]],
                             [[7,2], [0,3], [0,0]]])

# this one is based off of example 2.5 of the 1973 Aumann paper
confusing = np.array([[[[0,0,3], [2,2,2], [0,0,0]],
                       [[0,0,0], [0,0,0], [0,0,0]]],
                      [[[1,0,0], [0,0,0], [0,1,0]],
                       [[0,0,0], [2,2,2], [0,0,3]]]])

## TODO: add example with a nice correlated equilibrium but with no
## deterministic nash equilibria

game_matrix = matching_pennies

num_players = np.ndim(game_matrix) - 1
largest_payoff = np.amax(np.ndarray.flatten(game_matrix))

def num_actions(player):
    return np.shape(game_matrix)[player]

assert np.shape(game_matrix)[num_players] == num_players, "length of payoff array doesnt match number of players"

for i in np.ndarray.flatten(game_matrix):
    assert i >= 0, "not all payoffs are non-negative in your game matrix"

def replace_at_index(tup, index, val):
    return tup[:index] + (val,) + tup[index + 1:]

def regret(player, old_action, alt_action, payoff_diffs, hist_length):
    # payoff_diffs[player, old_action, alt_action] is an array of the difference
    # in utilities player would have gotten in the past by playing alt_action
    # wherever they actually played old_action
    return max(0, sum(payoff_diffs[player][old_action][alt_action])
                  / hist_length)

def update_payoff_diffs(payoff_diffs, action_tuple):
    # payoff_diffs[player, old_action, alt_action] is an array of the difference
    # in utilities player would have gotten in the past by playing alt_action
    # wherever they actually played old_action
    for player in range(num_players):
        assert isinstance(action_tuple[player], int), "what your action tuple isn't made of integers in update_payoff_diffs??????"
        old_action = action_tuple[player]
        assert old_action in range(num_actions(player)), "what your action tuple contains actions that aren't in range for their players in update_payoff_diffs???????"
        old_payoff = game_matrix[action_tuple][player]
        for alt_action in range(num_actions(player)):
            alt_action_tuple = replace_at_index(action_tuple, player,
                                                alt_action)
            alt_payoff = game_matrix[alt_action_tuple][player]
            payoff_diffs[player][old_action][alt_action].append(alt_payoff
                                                                - old_payoff)

# this function computes the distribution over new actions given a previous
# action and an array of regrets for playing the previous action instead of
# other actions. this array has one entry for every action, and the entry for
# the previous action must be zero.

def prob_next_action(player, last_action, regret_array):
    assert regret_array[last_action] == 0, "your regret isn't zero when it should obviously be"
    normalisation_const = (num_actions(player) - 1) * largest_payoff
    probs_array = [regret / normalisation_const for regret in regret_array]
    prob_last_action = 1 - sum(probs_array)
    probs_array[last_action] = prob_last_action
    return probs_array

# this function computes a history of playing the game up to some time.

def run_procedure(num_steps):
    initial_actions = []
    for player in range(num_players):
        initial_actions.append(random.randrange(num_actions(player)))
    initial_action_tuple = tuple(initial_actions)
    game_history = [initial_action_tuple]

    # initialise payoff_diffs and make this work with that

    # payoff_diffs[player, old_action, alt_action] is an array of the difference
    # in utilities player would have gotten in the past by playing alt_action
    # wherever they actually played old_action
    payoff_diffs = [[] for player in range(num_players)]
    for player in range(num_players):
        payoff_diffs[player] = [[] for old_action in range(num_actions(player))]
        for old_action in range(num_actions(player)):
            payoff_diffs[player][old_action] = [[] for alt_action
                                                in range(num_actions(player))]

    for t in range(num_steps):
        next_actions = []
        hist_length = t + 1
        update_payoff_diffs(payoff_diffs, game_history[-1])

        for player in range(num_players):
            actions = range(num_actions(player))
            old_action = game_history[-1][player]
            regret_array = [regret(player, old_action, alt_action, payoff_diffs,
                                   hist_length)
                            for alt_action in actions]
            prob_array = prob_next_action(player, game_history[-1][player],
                                          regret_array)
            next_actions.append(np.random.choice(num_actions(player),
                                                 p = prob_array))
        game_history.append(tuple(next_actions))

    return(game_history)

print(run_procedure(100))
	# Written by Daniel Filan (2017) based on the algorithm described in Hart and Mas-Colell (2000)

	# This code is in the public domain.

	# This script implements the Hart-Mas-Colell algorithm for finding correlated
	# equilibria in many-player many-action games via the players following a simple
	# adaptive procedure.

	import numpy as np
	import random

	# players are 0, 1, 2, ..., n-1

	# Game 'matrix': each player i chooses index i and gets payoff equal to element
	# i of the resulting list

	# so, payoff to player i is
	# matrix[player_0_action][player_1_action]...[player_n-1_action][i]

	# For the code to work, all entries of the matrix must be non-negative numbers.

	prisoners_dilemma = np.array([[[2,2], [0,4]],
	[[4,0], [1,1]]])

	chicken = np.array([[[6,6], [2,7]],
	[[7,2], [0,0]]])

	matching_pennies = np.array([[[0,1], [1,0]],
	[[1,0], [0,1]]])

	battle_of_sexes = np.array([[[2,1], [0,0]],
	[[0,0], [1,2]]])

	# this one is based off of example 2.8 of the 1973 Aumann paper
	expanded_chicken = np.array([[[6,6], [0,0], [2,7]],
	[[0,0], [4,4], [3,0]],
	[[7,2], [0,3], [0,0]]])

	# this one is based off of example 2.5 of the 1973 Aumann paper
	confusing = np.array([[[[0,0,3], [2,2,2], [0,0,0]],
	[[0,0,0], [0,0,0], [0,0,0]]],
	[[[1,0,0], [0,0,0], [0,1,0]],
	[[0,0,0], [2,2,2], [0,0,3]]]])

	## TODO: add example with a nice correlated equilibrium but with no
	## deterministic nash equilibria

	game_matrix = matching_pennies

	num_players = np.ndim(game_matrix) - 1
	largest_payoff = np.amax(np.ndarray.flatten(game_matrix))

	def num_actions(player):
	return np.shape(game_matrix)[player]

	assert np.shape(game_matrix)[num_players] == num_players, "length of payoff array doesnt match number of players"

	for i in np.ndarray.flatten(game_matrix):
	assert i >= 0, "not all payoffs are non-negative in your game matrix"

	def replace_at_index(tup, index, val):
	return tup[:index] + (val,) + tup[index + 1:]

	def regret(player, old_action, alt_action, payoff_diffs, hist_length):
	# payoff_diffs[player, old_action, alt_action] is an array of the difference
	# in utilities player would have gotten in the past by playing alt_action
	# wherever they actually played old_action
	return max(0, sum(payoff_diffs[player][old_action][alt_action])
	/ hist_length)

	def update_payoff_diffs(payoff_diffs, action_tuple):
	# payoff_diffs[player, old_action, alt_action] is an array of the difference
	# in utilities player would have gotten in the past by playing alt_action
	# wherever they actually played old_action
	for player in range(num_players):
	assert isinstance(action_tuple[player], int), "what your action tuple isn't made of integers in update_payoff_diffs??????"
	old_action = action_tuple[player]
	assert old_action in range(num_actions(player)), "what your action tuple contains actions that aren't in range for their players in update_payoff_diffs???????"
	old_payoff = game_matrix[action_tuple][player]
	for alt_action in range(num_actions(player)):
	alt_action_tuple = replace_at_index(action_tuple, player,
	alt_action)
	alt_payoff = game_matrix[alt_action_tuple][player]
	payoff_diffs[player][old_action][alt_action].append(alt_payoff
	- old_payoff)

	# this function computes the distribution over new actions given a previous
	# action and an array of regrets for playing the previous action instead of
	# other actions. this array has one entry for every action, and the entry for
	# the previous action must be zero.

	def prob_next_action(player, last_action, regret_array):
	assert regret_array[last_action] == 0, "your regret isn't zero when it should obviously be"
	normalisation_const = (num_actions(player) - 1) * largest_payoff
	probs_array = [regret / normalisation_const for regret in regret_array]
	prob_last_action = 1 - sum(probs_array)
	probs_array[last_action] = prob_last_action
	return probs_array

	# this function computes a history of playing the game up to some time.

	def run_procedure(num_steps):
	initial_actions = []
	for player in range(num_players):
	initial_actions.append(random.randrange(num_actions(player)))
	initial_action_tuple = tuple(initial_actions)
	game_history = [initial_action_tuple]

	# initialise payoff_diffs and make this work with that

	# payoff_diffs[player, old_action, alt_action] is an array of the difference
	# in utilities player would have gotten in the past by playing alt_action
	# wherever they actually played old_action
	payoff_diffs = [[] for player in range(num_players)]
	for player in range(num_players):
	payoff_diffs[player] = [[] for old_action in range(num_actions(player))]
	for old_action in range(num_actions(player)):
	payoff_diffs[player][old_action] = [[] for alt_action
	in range(num_actions(player))]

	for t in range(num_steps):
	next_actions = []
	hist_length = t + 1
	update_payoff_diffs(payoff_diffs, game_history[-1])

	for player in range(num_players):
	actions = range(num_actions(player))
	old_action = game_history[-1][player]
	regret_array = [regret(player, old_action, alt_action, payoff_diffs,
	hist_length)
	for alt_action in actions]
	prob_array = prob_next_action(player, game_history[-1][player],
	regret_array)
	next_actions.append(np.random.choice(num_actions(player),
	p = prob_array))
	game_history.append(tuple(next_actions))

	return(game_history)

	print(run_procedure(100))