Skip to content

Instantly share code, notes, and snippets.

@fbparis
Created July 10, 2020 06:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fbparis/dcda3f98b4c9351415388d5b2dba8c29 to your computer and use it in GitHub Desktop.
Save fbparis/dcda3f98b4c9351415388d5b2dba8c29 to your computer and use it in GitHub Desktop.
Very good multi K-Armed Bandits solver, no need parameter!
class RandomUCI():
"""Random Upper Confidence Interval
"""
def __init__(self, bandit):
self.bandit = bandit
self.arm_count = bandit.arm_count
self.mean = np.ones(self.arm_count) / 2
self.variance = np.ones(self.arm_count) / 4
self.N = np.ones(self.arm_count)
@staticmethod
def name():
return 'Random-UCI'
def get_reward_regret(self, arm):
reward, regret = self.bandit.get_reward_regret(arm)
self._update_params(arm, reward)
return reward, regret
def get_action(self):
return np.argmax(self.mean + np.random.random() * np.sqrt(self.variance) / self.N)
def _update_params(self, arm, reward):
self.N[arm] += 1
delta = reward - self.mean[arm]
self.mean[arm] += delta / self.N[arm]
self.variance[arm] += delta * (reward - self.mean[arm])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment