Skip to content

Instantly share code, notes, and snippets.

@comckay
Created November 13, 2020 16:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save comckay/f1ff32cf964491b537c39a14674edf0d to your computer and use it in GitHub Desktop.
Save comckay/f1ff32cf964491b537c39a14674edf0d to your computer and use it in GitHub Desktop.
from typing import List
import random
import numpy as np
class EGreedy:
def __init__(self, models: List[str], epsilon: float = 0.1):
self.models, n_models = models, len(models)
self.epsilon = epsilon
self.model_successes = np.zeros((n_models))
self.model_tries = np.zeros((n_models))
def _increment_model_tries(self, model: str) -> None:
self.model_tries[self.models.index(model)] += 1
def _epsilon_greedy_selection(self):
if random.random() < self.epsilon:
random_model = random.choice(self.models)
return random_model
else:
best_model_so_far = self.models[
np.nanargmax(self.model_successes / self.model_tries)
]
return best_model_so_far
def select_model(self) -> str:
untested_models = np.nonzero(self.model_tries == 0)[0]
if untested_models.size == 0:
epsilon_greedy_selection = self._epsilon_greedy_selection()
self._increment_model_tries(epsilon_greedy_selection)
return epsilon_greedy_selection
else:
untested_model = self.models[untested_models[0]]
self._increment_model_tries(untested_model)
return untested_model
def reward_model(self, model: str) -> None:
if model not in self.models:
raise ValueError(f"model {model} not recognized")
model_index = self.models.index(model)
self.model_successes[model_index] += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment