Skip to content

Instantly share code, notes, and snippets.

@nums11
nums11 / test_agents.py
Created February 18, 2023 22:58
Test UCB Agent
from Agents import UCBAgent
import matplotlib.pyplot as plt
from NonStationaryBanditEnv import NonStationaryBanditEnv
import numpy as np
env = NonStationaryBanditEnv()
def testUCBAgent(c):
ucb_agent = UCBAgent(env, 0.1, c)
num_episodes = 10000
@nums11
nums11 / Agents.py
Created February 18, 2023 22:54
Agents with UCB
import random
import numpy as np
class Action(object):
"""
Implementation of an action.
optimistic: determines whether or not Q is initialized optimistically
"""
def __init__(self, optimistic=False):
@nums11
nums11 / Agents.py
Created February 18, 2023 02:25
Agents With Constant Alpha
import random
class Action(object):
"""
Implementation of an action.
optimistic: determines whether or not Q is initialized optimistically
"""
def __init__(self, optimistic=False):
# Current value estimation for this action
@nums11
nums11 / NonStationaryBanditEnv.py
Created February 18, 2023 01:50
NonstationaryBanditEnv
import gym
from gym import Env
from gym.spaces import Discrete
import random
class NonStationaryBanditEnv(Env):
"""
Non-Stationary 5-arm Bandit Environment
At any given time, the best action returns a reward of 1
@nums11
nums11 / test_agents.py
Created February 17, 2023 00:50
Test Multi-Armed Bandits Agents
import gym
import gym_bandits
from Agents import GreedyAgent, EpsilonGreedyAgent
import matplotlib.pyplot as plt
# env = gym.make("BanditTwoArmedDeterministicFixed-v0")
env = gym.make("BanditTenArmedUniformDistributedReward-v0")
greedy_agent = GreedyAgent(env)
e_greedy_agent = EpsilonGreedyAgent(env, 0.1)
@nums11
nums11 / Agents.py
Created February 16, 2023 23:56
Python Implementation of Greedy & Epsilon-Greedy Agents
import random
class Action(object):
"""
Implementation of an action.
optimistic: determines whether or not Q is initialized optimistically
"""
def __init__(self, optimistic=False):
# Current value estimation for this action