Skip to content

Instantly share code, notes, and snippets.

def run(self):
"""
run function - Running a game
"""
# The total rewards of each episode.
rewards = dict()
for episode in range(1, self.episodes + 1):
# The total reward for the current episode.
def update_model(self, transition: Transition):
"""
Updating the module.
:param transition: state, action, reward, new_state, done
"""
if transition.reward >= self.best_reward:
self.best_reward = transition.reward
self.best_weights = np.copy(self.weights)
self.alpha = max(self.alpha / 1.5, 1e-2)
else:
def get_action(self, state):
"""
Getting the action with the highest value for a state
:param state: the state.
:return: the action with the highest value for a state.
"""
return np.argmax(np.dot(state, self.weights))
@wolfenfeld
wolfenfeld / mean_population_example.py
Last active January 31, 2020 12:59
mean population example
t = 150
mu=1.2
sigma=0.1
alpha_i = sigma * np.random.randn(t) + mu
amount_of_samples = int(0.7*t)
sample_indices = np.random.choice(t, amount_of_samples, replace=False)
sampled_alpha_i = alpha_i[sample_indices]
@wolfenfeld
wolfenfeld / mean_fixed_time_example.py
Created January 27, 2020 20:02
mean fixed time example
n = 30
mu= 100
sigma=30
x_i = sigma * np.random.randn(n) + mu
t_i = 10*np.ones(n)
v_i = x_i/t_i
amount_of_samples = int(0.7*n)
sample_indices = np.random.choice(n, amount_of_samples, replace=False)
@wolfenfeld
wolfenfeld / mean_fixed_distance_example.py
Created January 27, 2020 20:01
mean fixed distance example
n = 50
mu= 100
sigma=40
t_i = sigma * np.random.randn(n) + mu
x_i = 1000*np.ones(n)
v_i = x_i/t_i
amount_of_samples = int(0.7*n)
sample_indices = np.random.choice(n, amount_of_samples, replace=False)
@wolfenfeld
wolfenfeld / mean_imports.py
Created January 27, 2020 19:59
mean imports
import numpy as np
from scipy.stats import gmean,hmean
@wolfenfeld
wolfenfeld / bandit_simulation.py
Last active May 9, 2019 09:36
Bandit Simulation
class BanditSimulation(object):
def __init__(self, k_arms, T, number_of_runs):
self.T = T
self.number_of_runs = number_of_runs
self.k_arms = k_arms
self.cumulative_regret = {"Thompson Sampling":np.zeros((T,number_of_runs)),
"UCB":np.zeros((T,number_of_runs))}
def run(self):
class BernoulliArm(object):
def __init__(self, mu):
self.mu = mu
def draw(self):
return np.random.binomial(n=1, p=self.mu)
class BanditAlgorithm(object):
def __init__(self, k_arms):
pass
def select_arm(self):
pass
def update(self, chosen_arm, reward):
pass