Skip to content

Instantly share code, notes, and snippets.

@stockedge
Created February 19, 2017 10:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stockedge/d3d1c7a1b4be5907035ee2d3de4800a7 to your computer and use it in GitHub Desktop.
Save stockedge/d3d1c7a1b4be5907035ee2d3de4800a7 to your computer and use it in GitHub Desktop.
心理学的に最強のじゃんけんアルゴリズムをBayesian Optimizationで求める。
#[1] Wang, Zhijian, Bin Xu, and Hai-Jun Zhou. "Social cycling and conditional responses in the Rock-Paper-Scissors game." arXiv preprint arXiv:1404.5199 (2014).
from bayes_opt import BayesianOptimization
from enum import Enum
import random
class Hand(Enum):
rock = 0
paper = 1
sissors = 2
@classmethod
def get_first_hand(cls):
r = random.random()
if r < 1/3:
return cls.rock
elif 1/3 < r and r < 2/3:
return cls.paper
else:
return cls.sissors
@classmethod
def get_hand(cls, a, rm = 1/3, r0 = 1/3):
r = random.random()
if r < rm:
return cls.clockwise(a)
elif rm < r and r < (rm+r0):
return a
else:
return cls.counter_clockwise(a)
@classmethod
def counter_clockwise(cls, hand):
if hand == cls.rock:
return cls.paper
elif hand == cls.paper:
return cls.sissors
elif hand == cls.sissors:
return cls.rock
@classmethod
def clockwise(cls, hand):
if hand == cls.rock:
return cls.sissors
elif hand == cls.paper:
return cls.rock
elif hand == cls.sissors:
return cls.paper
class Result(Enum):
win = 3
tie = 4
loss = 5
@classmethod
def judge(cls, a, b):
if a == b:
return cls.tie
elif a == Hand.rock and b == Hand.paper:
return cls.loss
elif a == Hand.paper and b == Hand.sissors:
return cls.loss
elif a == Hand.sissors and b == Hand.rock:
return cls.loss
return cls.win
class Player():
last_hand = None
last_result = None
def __init__(self, wm, w0, tm, t0, lm, l0):
self.wm = wm
self.w0 = w0
self.tm = tm
self.t0 = t0
self.lm = lm
self.l0 = l0
def next_hand(self):
if self.last_result == None:
my_hand = Hand.get_first_hand()
elif self.last_result == Result.win:
my_hand = Hand.get_hand(self.last_hand, self.wm, self.w0)
elif self.last_result == Result.tie:
my_hand = Hand.get_hand(self.last_hand, self.tm, self.t0)
elif self.last_result == Result.loss:
my_hand = Hand.get_hand(self.last_hand, self.lm, self.l0)
self.last_hand = my_hand
return my_hand
def judge(self, enemy_hand):
self.last_result = Result.judge(self.last_hand, enemy_hand)
class RPSGame():
def __init__(self, wm, w0, tm, t0, lm, l0):
self.human_player = Player(0.32, 0.45, 0.27, 0.36, 0.4, 0.34) #このパラメータは[1]の論文から持ってきた
self.cpu_player = Player(wm, w0, tm, t0, lm, l0)
self.stat = {}
def play(self):
human_hand = self.human_player.next_hand()
cpu_hand = self.cpu_player.next_hand()
self.human_player.judge(cpu_hand)
self.cpu_player.judge(human_hand)
result = Result.judge(human_hand, cpu_hand)
self.stat[result] = self.stat.get(result, 0) + 1
def score(self, alpha = 2):
score = self.stat[Result.win] * alpha
score += self.stat[Result.tie]
return score
def fitness(wm, w0, tm, t0, lm, l0):
game = RPSGame(wm, w0, tm, t0, lm, l0)
for i in range(1000):
game.play()
return game.score(100)
bo = BayesianOptimization(fitness,
{'wm': (0, 1), 'w0': (0, 1)
, 'tm': (0, 1), 't0': (0, 1)
, 'lm': (0, 1), 'l0': (0, 1)})
bo.explore({'wm': [0.3], 'w0': [0.3], 'tm': [0.3], 't0': [0.3], 'lm': [0.3], 'l0': [0.3]})
bo.maximize(init_points=10, n_iter=10, kappa=2)
print(bo.res['max'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment