Skip to content

Instantly share code, notes, and snippets.

@nums11
nums11 / Agents.py
Created February 16, 2023 23:56
Python Implementation of Greedy & Epsilon-Greedy Agents
import random
class Action(object):
"""
Implementation of an action.
optimistic: determines whether or not Q is initialized optimistically
"""
def __init__(self, optimistic=False):
# Current value estimation for this action
@nums11
nums11 / test_agents.py
Created February 17, 2023 00:50
Test Multi-Armed Bandits Agents
import gym
import gym_bandits
from Agents import GreedyAgent, EpsilonGreedyAgent
import matplotlib.pyplot as plt
# env = gym.make("BanditTwoArmedDeterministicFixed-v0")
env = gym.make("BanditTenArmedUniformDistributedReward-v0")
greedy_agent = GreedyAgent(env)
e_greedy_agent = EpsilonGreedyAgent(env, 0.1)
@nums11
nums11 / NonStationaryBanditEnv.py
Created February 18, 2023 01:50
NonstationaryBanditEnv
import gym
from gym import Env
from gym.spaces import Discrete
import random
class NonStationaryBanditEnv(Env):
"""
Non-Stationary 5-arm Bandit Environment
At any given time, the best action returns a reward of 1
@nums11
nums11 / Agents.py
Created February 18, 2023 02:25
Agents With Constant Alpha
import random
class Action(object):
"""
Implementation of an action.
optimistic: determines whether or not Q is initialized optimistically
"""
def __init__(self, optimistic=False):
# Current value estimation for this action
@nums11
nums11 / Agents.py
Created February 18, 2023 22:54
Agents with UCB
import random
import numpy as np
class Action(object):
"""
Implementation of an action.
optimistic: determines whether or not Q is initialized optimistically
"""
def __init__(self, optimistic=False):
@nums11
nums11 / test_agents.py
Created February 18, 2023 22:58
Test UCB Agent
from Agents import UCBAgent
import matplotlib.pyplot as plt
from NonStationaryBanditEnv import NonStationaryBanditEnv
import numpy as np
env = NonStationaryBanditEnv()
def testUCBAgent(c):
ucb_agent = UCBAgent(env, 0.1, c)
num_episodes = 10000
@nums11
nums11 / Agents.py
Created February 22, 2023 07:57
Agents with GradientBandit
import random
import numpy as np
class Action(object):
"""
Implementation of an action.
optimistic: determines whether or not Q is initialized optimistically
"""
def __init__(self, optimistic=False):
@nums11
nums11 / test_g_bandit_agent.py
Created February 22, 2023 08:04
Test Gradient Bandit Agent
from Agents import GradientBanditAgent
import matplotlib.pyplot as plt
from NonStationaryBanditEnv import NonStationaryBanditEnv
import numpy as np
env = NonStationaryBanditEnv()
def testGBanditAgent(alpha):
g_bandit_agent = GradientBanditAgent(env, alpha)
num_episodes = 10000
@nums11
nums11 / GridWorldEnv.py
Created March 4, 2023 02:23
Grid World
import gym
from gym import Env
from gym.spaces import Discrete, Box
import random
import numpy as np
import pprint
class GridWorldEnv(Env):
"""
Simple 4x4 GridWorld Env from figure 4.1 in the RL textbook
@nums11
nums11 / Agents.py
Created March 4, 2023 02:28
Policy Iteration
import numpy as np
import pprint
class State(object):
"""
Represents a state or a point in the grid.
coord: coordinate in grid world
"""
def __init__(self, coord, is_terminal):