Created
February 19, 2017 10:41
-
-
Save stockedge/d3d1c7a1b4be5907035ee2d3de4800a7 to your computer and use it in GitHub Desktop.
心理学的に最強のじゃんけんアルゴリズムをBayesian Optimizationで求める。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[1] Wang, Zhijian, Bin Xu, and Hai-Jun Zhou. "Social cycling and conditional responses in the Rock-Paper-Scissors game." arXiv preprint arXiv:1404.5199 (2014). | |
from bayes_opt import BayesianOptimization | |
from enum import Enum | |
import random | |
class Hand(Enum): | |
rock = 0 | |
paper = 1 | |
sissors = 2 | |
@classmethod | |
def get_first_hand(cls): | |
r = random.random() | |
if r < 1/3: | |
return cls.rock | |
elif 1/3 < r and r < 2/3: | |
return cls.paper | |
else: | |
return cls.sissors | |
@classmethod | |
def get_hand(cls, a, rm = 1/3, r0 = 1/3): | |
r = random.random() | |
if r < rm: | |
return cls.clockwise(a) | |
elif rm < r and r < (rm+r0): | |
return a | |
else: | |
return cls.counter_clockwise(a) | |
@classmethod | |
def counter_clockwise(cls, hand): | |
if hand == cls.rock: | |
return cls.paper | |
elif hand == cls.paper: | |
return cls.sissors | |
elif hand == cls.sissors: | |
return cls.rock | |
@classmethod | |
def clockwise(cls, hand): | |
if hand == cls.rock: | |
return cls.sissors | |
elif hand == cls.paper: | |
return cls.rock | |
elif hand == cls.sissors: | |
return cls.paper | |
class Result(Enum): | |
win = 3 | |
tie = 4 | |
loss = 5 | |
@classmethod | |
def judge(cls, a, b): | |
if a == b: | |
return cls.tie | |
elif a == Hand.rock and b == Hand.paper: | |
return cls.loss | |
elif a == Hand.paper and b == Hand.sissors: | |
return cls.loss | |
elif a == Hand.sissors and b == Hand.rock: | |
return cls.loss | |
return cls.win | |
class Player(): | |
last_hand = None | |
last_result = None | |
def __init__(self, wm, w0, tm, t0, lm, l0): | |
self.wm = wm | |
self.w0 = w0 | |
self.tm = tm | |
self.t0 = t0 | |
self.lm = lm | |
self.l0 = l0 | |
def next_hand(self): | |
if self.last_result == None: | |
my_hand = Hand.get_first_hand() | |
elif self.last_result == Result.win: | |
my_hand = Hand.get_hand(self.last_hand, self.wm, self.w0) | |
elif self.last_result == Result.tie: | |
my_hand = Hand.get_hand(self.last_hand, self.tm, self.t0) | |
elif self.last_result == Result.loss: | |
my_hand = Hand.get_hand(self.last_hand, self.lm, self.l0) | |
self.last_hand = my_hand | |
return my_hand | |
def judge(self, enemy_hand): | |
self.last_result = Result.judge(self.last_hand, enemy_hand) | |
class RPSGame(): | |
def __init__(self, wm, w0, tm, t0, lm, l0): | |
self.human_player = Player(0.32, 0.45, 0.27, 0.36, 0.4, 0.34) #このパラメータは[1]の論文から持ってきた | |
self.cpu_player = Player(wm, w0, tm, t0, lm, l0) | |
self.stat = {} | |
def play(self): | |
human_hand = self.human_player.next_hand() | |
cpu_hand = self.cpu_player.next_hand() | |
self.human_player.judge(cpu_hand) | |
self.cpu_player.judge(human_hand) | |
result = Result.judge(human_hand, cpu_hand) | |
self.stat[result] = self.stat.get(result, 0) + 1 | |
def score(self, alpha = 2): | |
score = self.stat[Result.win] * alpha | |
score += self.stat[Result.tie] | |
return score | |
def fitness(wm, w0, tm, t0, lm, l0): | |
game = RPSGame(wm, w0, tm, t0, lm, l0) | |
for i in range(1000): | |
game.play() | |
return game.score(100) | |
bo = BayesianOptimization(fitness, | |
{'wm': (0, 1), 'w0': (0, 1) | |
, 'tm': (0, 1), 't0': (0, 1) | |
, 'lm': (0, 1), 'l0': (0, 1)}) | |
bo.explore({'wm': [0.3], 'w0': [0.3], 'tm': [0.3], 't0': [0.3], 'lm': [0.3], 'l0': [0.3]}) | |
bo.maximize(init_points=10, n_iter=10, kappa=2) | |
print(bo.res['max']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment