stockedge/StrongestRPS.py

## StrongestRPS.py
#[1] Wang, Zhijian, Bin Xu, and Hai-Jun Zhou. "Social cycling and conditional responses in the Rock-Paper-Scissors game." arXiv preprint arXiv:1404.5199 (2014).
from bayes_opt import BayesianOptimization
from enum import Enum
import random

class Hand(Enum):
    rock = 0
    paper = 1
    sissors = 2
    @classmethod
    def get_first_hand(cls):
        r = random.random()
        if r < 1/3:
            return cls.rock
        elif 1/3 < r and r < 2/3:
            return cls.paper
        else:
            return cls.sissors
    @classmethod
    def get_hand(cls, a, rm = 1/3, r0 = 1/3):
        r = random.random()
        if r < rm:
            return cls.clockwise(a)
        elif rm < r and r < (rm+r0):
            return a
        else:
            return cls.counter_clockwise(a)
    @classmethod
    def counter_clockwise(cls, hand):
        if hand == cls.rock:
            return cls.paper
        elif hand == cls.paper:
            return cls.sissors
        elif hand == cls.sissors:
            return cls.rock
    @classmethod
    def clockwise(cls, hand):
        if hand == cls.rock:
            return cls.sissors
        elif hand == cls.paper:
            return cls.rock
        elif hand == cls.sissors:
            return cls.paper

class Result(Enum):
    win = 3
    tie = 4
    loss = 5
    @classmethod
    def judge(cls, a, b):
        if a == b:
            return cls.tie
        elif a == Hand.rock and b == Hand.paper:
            return cls.loss
        elif a == Hand.paper and b == Hand.sissors:
            return cls.loss
        elif a == Hand.sissors and b == Hand.rock:
            return cls.loss
        return cls.win

class Player():
    last_hand = None
    last_result = None
    def __init__(self, wm, w0, tm, t0, lm, l0):
        self.wm = wm
        self.w0 = w0
        self.tm = tm
        self.t0 = t0
        self.lm = lm
        self.l0 = l0
    def next_hand(self):
        if self.last_result == None:
            my_hand = Hand.get_first_hand()
        elif self.last_result == Result.win:
            my_hand = Hand.get_hand(self.last_hand, self.wm, self.w0)
        elif self.last_result == Result.tie:
            my_hand = Hand.get_hand(self.last_hand, self.tm, self.t0)
        elif self.last_result == Result.loss:
            my_hand = Hand.get_hand(self.last_hand, self.lm, self.l0)
        self.last_hand = my_hand
        return my_hand
    def judge(self, enemy_hand):
        self.last_result = Result.judge(self.last_hand, enemy_hand)

class RPSGame():
    def __init__(self, wm, w0, tm, t0, lm, l0):
        self.human_player = Player(0.32, 0.45, 0.27, 0.36, 0.4, 0.34) #このパラメータは[1]の論文から持ってきた
        self.cpu_player = Player(wm, w0, tm, t0, lm, l0)
        self.stat = {}
    def play(self):
        human_hand = self.human_player.next_hand()
        cpu_hand = self.cpu_player.next_hand()
        self.human_player.judge(cpu_hand)
        self.cpu_player.judge(human_hand)
        result = Result.judge(human_hand, cpu_hand)
        self.stat[result] = self.stat.get(result, 0) + 1
    def score(self, alpha = 2):
        score = self.stat[Result.win] * alpha
        score += self.stat[Result.tie]
        return score

def fitness(wm, w0, tm, t0, lm, l0):
    game = RPSGame(wm, w0, tm, t0, lm, l0)
    for i in range(1000):
        game.play()
    return game.score(100)

bo = BayesianOptimization(fitness,
                          {'wm': (0, 1), 'w0': (0, 1)
                          , 'tm': (0, 1), 't0': (0, 1)
                          , 'lm': (0, 1), 'l0': (0, 1)})
bo.explore({'wm': [0.3], 'w0': [0.3], 'tm': [0.3], 't0': [0.3], 'lm': [0.3], 'l0': [0.3]})
bo.maximize(init_points=10, n_iter=10, kappa=2)

print(bo.res['max'])
	#[1] Wang, Zhijian, Bin Xu, and Hai-Jun Zhou. "Social cycling and conditional responses in the Rock-Paper-Scissors game." arXiv preprint arXiv:1404.5199 (2014).
	from bayes_opt import BayesianOptimization
	from enum import Enum
	import random

	class Hand(Enum):
	rock = 0
	paper = 1
	sissors = 2
	@classmethod
	def get_first_hand(cls):
	r = random.random()
	if r < 1/3:
	return cls.rock
	elif 1/3 < r and r < 2/3:
	return cls.paper
	else:
	return cls.sissors
	@classmethod
	def get_hand(cls, a, rm = 1/3, r0 = 1/3):
	r = random.random()
	if r < rm:
	return cls.clockwise(a)
	elif rm < r and r < (rm+r0):
	return a
	else:
	return cls.counter_clockwise(a)
	@classmethod
	def counter_clockwise(cls, hand):
	if hand == cls.rock:
	return cls.paper
	elif hand == cls.paper:
	return cls.sissors
	elif hand == cls.sissors:
	return cls.rock
	@classmethod
	def clockwise(cls, hand):
	if hand == cls.rock:
	return cls.sissors
	elif hand == cls.paper:
	return cls.rock
	elif hand == cls.sissors:
	return cls.paper

	class Result(Enum):
	win = 3
	tie = 4
	loss = 5
	@classmethod
	def judge(cls, a, b):
	if a == b:
	return cls.tie
	elif a == Hand.rock and b == Hand.paper:
	return cls.loss
	elif a == Hand.paper and b == Hand.sissors:
	return cls.loss
	elif a == Hand.sissors and b == Hand.rock:
	return cls.loss
	return cls.win

	class Player():
	last_hand = None
	last_result = None
	def __init__(self, wm, w0, tm, t0, lm, l0):
	self.wm = wm
	self.w0 = w0
	self.tm = tm
	self.t0 = t0
	self.lm = lm
	self.l0 = l0
	def next_hand(self):
	if self.last_result == None:
	my_hand = Hand.get_first_hand()
	elif self.last_result == Result.win:
	my_hand = Hand.get_hand(self.last_hand, self.wm, self.w0)
	elif self.last_result == Result.tie:
	my_hand = Hand.get_hand(self.last_hand, self.tm, self.t0)
	elif self.last_result == Result.loss:
	my_hand = Hand.get_hand(self.last_hand, self.lm, self.l0)
	self.last_hand = my_hand
	return my_hand
	def judge(self, enemy_hand):
	self.last_result = Result.judge(self.last_hand, enemy_hand)

	class RPSGame():
	def __init__(self, wm, w0, tm, t0, lm, l0):
	self.human_player = Player(0.32, 0.45, 0.27, 0.36, 0.4, 0.34) #このパラメータは[1]の論文から持ってきた
	self.cpu_player = Player(wm, w0, tm, t0, lm, l0)
	self.stat = {}
	def play(self):
	human_hand = self.human_player.next_hand()
	cpu_hand = self.cpu_player.next_hand()
	self.human_player.judge(cpu_hand)
	self.cpu_player.judge(human_hand)
	result = Result.judge(human_hand, cpu_hand)
	self.stat[result] = self.stat.get(result, 0) + 1
	def score(self, alpha = 2):
	score = self.stat[Result.win] * alpha
	score += self.stat[Result.tie]
	return score

	def fitness(wm, w0, tm, t0, lm, l0):
	game = RPSGame(wm, w0, tm, t0, lm, l0)
	for i in range(1000):
	game.play()
	return game.score(100)

	bo = BayesianOptimization(fitness,
	{'wm': (0, 1), 'w0': (0, 1)
	, 'tm': (0, 1), 't0': (0, 1)
	, 'lm': (0, 1), 'l0': (0, 1)})
	bo.explore({'wm': [0.3], 'w0': [0.3], 'tm': [0.3], 't0': [0.3], 'lm': [0.3], 'l0': [0.3]})
	bo.maximize(init_points=10, n_iter=10, kappa=2)

	print(bo.res['max'])