Skip to content

Instantly share code, notes, and snippets.

@comckay
Last active September 28, 2021 13:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save comckay/42bab0272cd4a353f170e881da56cbd5 to your computer and use it in GitHub Desktop.
Save comckay/42bab0272cd4a353f170e881da56cbd5 to your computer and use it in GitHub Desktop.
from typing import List
import numpy as np
class LinUCB:
def __init__(self, models: List[str], n_features: int, alpha: float):
self.models, self.n_models = models, len(models)
self.alpha = alpha
self.model_tries = np.zeros((self.n_models))
self.covariance_matrices = np.tile(
np.identity(n_features), (self.n_models, 1, 1)
)
self.reward_matrix = np.zeros((self.n_models, n_features))
def _increment_model_tries(self, model: str) -> None:
self.model_tries[self.models.index(model)] += 1
def _update_covariance_matrix(self, model: str, context: np.ndarray) -> None:
model_index = self.models.index(model)
self.covariance_matrices[model_index] = self.covariance_matrices[
model_index
] + np.dot(context, context.T)
def _get_model_with_max_ucb(self, context: np.ndarray) -> str:
inverse_covariance_matrices = np.linalg.inv(self.covariance_matrices)
linucb_estimates = []
for covariance_matrix, reward_vector in zip(
inverse_covariance_matrices, self.reward_matrix
):
arm_coefficients = np.dot(covariance_matrix, reward_vector)
pointwise_estimate = np.dot(arm_coefficients, context)
upper_bound = self.alpha * np.sqrt(
np.dot(np.dot(context.T, covariance_matrix), context)
)
linucb_estimates.append(pointwise_estimate + upper_bound)
return self.models[np.nanargmax(linucb_estimates)]
def select_model(self, context: np.ndarray) -> str:
untested_models = np.nonzero(self.model_tries == 0)[0]
if untested_models.size == 0:
best_model_so_far = self._get_model_with_max_ucb(context)
self._increment_model_tries(best_model_so_far)
self._update_covariance_matrix(best_model_so_far, context)
return best_model_so_far
else:
untested_model = self.models[untested_models[0]]
self._increment_model_tries(untested_model)
self._update_covariance_matrix(untested_model, context)
return untested_model
def reward_model(
self, model: str, context: np.ndarray, reward: float = 1.0
) -> None:
if model not in self.models:
raise ValueError(f"model {model} not recognized")
model_index = self.models.index(model)
self.reward_matrix[model_index] += reward * context
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment