Amit Wolfenfeld wolfenfeld

## hill_climb_cartl_pole_run_game.py
def run(self):
    """
    run function - Running a game
    """
    # The total rewards of each episode.
    rewards = dict()

    for episode in range(1, self.episodes + 1):

        # The total reward for the current episode.

## hill_climb_agent_update_model.py
def update_model(self, transition: Transition):
    """
    Updating the module.
    :param transition: state, action, reward, new_state, done
    """
    if transition.reward >= self.best_reward:
        self.best_reward = transition.reward
        self.best_weights = np.copy(self.weights)
        self.alpha = max(self.alpha / 1.5, 1e-2)
    else:

## hill_climb_agent_choose_action.py
def get_action(self, state):
    """
    Getting the action with the highest value for a state
    :param state: the state.
    :return: the action with the highest value for a state.
    """

    return np.argmax(np.dot(state, self.weights))

## mean_population_example.py
t = 150
mu=1.2
sigma=0.1
alpha_i = sigma * np.random.randn(t) + mu

amount_of_samples = int(0.7*t)

sample_indices = np.random.choice(t, amount_of_samples, replace=False)

sampled_alpha_i = alpha_i[sample_indices]

## mean_fixed_time_example.py
n = 30
mu= 100
sigma=30

x_i = sigma * np.random.randn(n) + mu
t_i = 10*np.ones(n)
v_i = x_i/t_i

amount_of_samples = int(0.7*n)
sample_indices = np.random.choice(n, amount_of_samples, replace=False)

## mean_fixed_distance_example.py
n = 50
mu= 100
sigma=40

t_i = sigma * np.random.randn(n) + mu
x_i = 1000*np.ones(n)
v_i = x_i/t_i

amount_of_samples = int(0.7*n)
sample_indices = np.random.choice(n, amount_of_samples, replace=False)

## mean_imports.py
import numpy as np

from scipy.stats import gmean,hmean

## bandit_simulation.py
class BanditSimulation(object):

    def __init__(self, k_arms, T, number_of_runs):
        self.T = T
        self.number_of_runs = number_of_runs
        self.k_arms = k_arms
        self.cumulative_regret = {"Thompson Sampling":np.zeros((T,number_of_runs)),
                                  "UCB":np.zeros((T,number_of_runs))}

    def run(self):

## bernoulli_arm.py
class BernoulliArm(object):

    def __init__(self, mu):
        self.mu = mu

    def draw(self):
        return np.random.binomial(n=1, p=self.mu)

## bandits_algorithms.py
class BanditAlgorithm(object):

    def __init__(self, k_arms):
        pass

    def select_arm(self):
        pass

    def update(self, chosen_arm, reward):
        pass
	def run(self):
	"""
	run function - Running a game
	"""
	# The total rewards of each episode.
	rewards = dict()

	for episode in range(1, self.episodes + 1):

	# The total reward for the current episode.
	def update_model(self, transition: Transition):
	"""
	Updating the module.
	:param transition: state, action, reward, new_state, done
	"""
	if transition.reward >= self.best_reward:
	self.best_reward = transition.reward
	self.best_weights = np.copy(self.weights)
	self.alpha = max(self.alpha / 1.5, 1e-2)
	else:
	def get_action(self, state):
	"""
	Getting the action with the highest value for a state
	:param state: the state.
	:return: the action with the highest value for a state.
	"""

	return np.argmax(np.dot(state, self.weights))
	t = 150
	mu=1.2
	sigma=0.1
	alpha_i = sigma * np.random.randn(t) + mu

	amount_of_samples = int(0.7*t)

	sample_indices = np.random.choice(t, amount_of_samples, replace=False)

	sampled_alpha_i = alpha_i[sample_indices]
	n = 30
	mu= 100
	sigma=30

	x_i = sigma * np.random.randn(n) + mu
	t_i = 10*np.ones(n)
	v_i = x_i/t_i

	amount_of_samples = int(0.7*n)
	sample_indices = np.random.choice(n, amount_of_samples, replace=False)
	n = 50
	mu= 100
	sigma=40

	t_i = sigma * np.random.randn(n) + mu
	x_i = 1000*np.ones(n)
	v_i = x_i/t_i

	amount_of_samples = int(0.7*n)
	sample_indices = np.random.choice(n, amount_of_samples, replace=False)
	class BanditSimulation(object):

	def __init__(self, k_arms, T, number_of_runs):
	self.T = T
	self.number_of_runs = number_of_runs
	self.k_arms = k_arms
	self.cumulative_regret = {"Thompson Sampling":np.zeros((T,number_of_runs)),
	"UCB":np.zeros((T,number_of_runs))}

	def run(self):
	class BernoulliArm(object):

	def __init__(self, mu):
	self.mu = mu

	def draw(self):
	return np.random.binomial(n=1, p=self.mu)
	class BanditAlgorithm(object):

	def __init__(self, k_arms):
	pass

	def select_arm(self):
	pass

	def update(self, chosen_arm, reward):
	pass