Skip to content

Instantly share code, notes, and snippets.

@shahradj
Last active Oct 2, 2019
Embed
What would you like to do?
Playing the game of Prisoners Dilemma
class Game:
def __init__(self, max_game=100):
self.p1 = Player('Agent A')
self.p2 = Player('Agent B')
self.max_game = max_game
def play(self, avg_regret_matching=False):
def play_regret_matching():
for i in xrange(0, self.max_game):
self.p1.update_strategy()
self.p2.update_strategy()
a1 = self.p1.action()
a2 = self.p2.action()
self.p1.regret(a1, a2)
self.p2.regret(a2, a1)
acts = (a1, a2)
num_wins[acts] += 1
def play_avg_regret_matching():
for i in xrange(0, self.max_game):
a1 = self.p1.action(use_avg=True)
a2 = self.p2.action(use_avg=True)
acts = (a1, a2)
num_wins[acts] += 1
num_wins = {
acts:0
for acts in itertools.product(
Prisoners.actions,
Prisoners.actions
)
}
play_regret_matching() if not avg_regret_matching else play_avg_regret_matching()
print num_wins
def conclude(self):
"""
let two players conclude the average strategy from the previous strategy stats
"""
self.p1.learn_avg_strategy()
self.p2.learn_avg_strategy()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment