Skip to content

Instantly share code, notes, and snippets.

@nums11
nums11 / MCAlgos.py
Created April 12, 2023 22:20
Off policy MC Control using Weighted Importance Sampling
# An incremental updates version of off-policy MC Control
# using weighted importance sampling
def OffPolicyEveryVistMCControl(gamma, num_episodes):
# Target policy is a deterministic policy with initial actions chosen
# randomly for each state
pi = Policy('d')
# Behavior policy is a nondeterministic policy that selects all actions
# with equal probability
b = Policy('nd')
@nums11
nums11 / MCAlgos.py
Created April 12, 2023 01:37
On Policy First Visit MC Control for Epsilon Greedy Policies
def OnPolicyFirstVistMCControl(gamma, num_episodes, epsilon=0.1):
# Initialize e-greedy policy with initial actions chosen
# randomly for each state
policy = Policy('eg', epsilon)
# Initialize Q (action-value function) and returns for the 4x4 grid world
Q = {}
returns = {}
for x in range(4):
for y in range(4):
@nums11
nums11 / MCAlgos.py
Created April 12, 2023 00:48
Monte Carlo Control With Exploring Starts
# Implementation of Monte Carlo Control with Exploring Starts
def MCES(gamma, num_episodes):
# Initialize deterministic policy with initial actions chosen
# randomly for each state
policy = Policy('d')
# Initialize Q (action-value function) and returns for the 4x4 grid world
Q = {}
returns = {}
for x in range(4):
@nums11
nums11 / MCAlgos.py
Created April 12, 2023 00:06
First Visit MC Prediction
# Implementation of first visit MC prediction
def firstVisitMCPred(gamma, num_episodes):
# Initialize nondet policy that selects an action
# randomly with all actions having equal probability
policy = Policy('nd')
# Initialize V (state-value function) and returns for the 4x4 grid world
V = {}
returns = {}
for x in range(4):
@nums11
nums11 / Agents.py
Created March 6, 2023 23:47
Value Iteration Agent
import numpy as np
import pprint
class State(object):
"""
Represents a state or a point in the grid.
coord: coordinate in grid world
"""
def __init__(self, coord, is_terminal):
@nums11
nums11 / test.py
Created March 4, 2023 02:53
Test Policy Iteration Agent on Grid World
import gym
from GridWorldEnv import GridWorldEnv
from Agents import PolicyIterationAgent
env = GridWorldEnv()
env.reset()
env.render()
pi_agent = PolicyIterationAgent(0.9)
pi_agent.policyIterate()
@nums11
nums11 / Agents.py
Created March 4, 2023 02:28
Policy Iteration
import numpy as np
import pprint
class State(object):
"""
Represents a state or a point in the grid.
coord: coordinate in grid world
"""
def __init__(self, coord, is_terminal):
@nums11
nums11 / GridWorldEnv.py
Created March 4, 2023 02:23
Grid World
import gym
from gym import Env
from gym.spaces import Discrete, Box
import random
import numpy as np
import pprint
class GridWorldEnv(Env):
"""
Simple 4x4 GridWorld Env from figure 4.1 in the RL textbook
@nums11
nums11 / test_g_bandit_agent.py
Created February 22, 2023 08:04
Test Gradient Bandit Agent
from Agents import GradientBanditAgent
import matplotlib.pyplot as plt
from NonStationaryBanditEnv import NonStationaryBanditEnv
import numpy as np
env = NonStationaryBanditEnv()
def testGBanditAgent(alpha):
g_bandit_agent = GradientBanditAgent(env, alpha)
num_episodes = 10000
@nums11
nums11 / Agents.py
Created February 22, 2023 07:57
Agents with GradientBandit
import random
import numpy as np
class Action(object):
"""
Implementation of an action.
optimistic: determines whether or not Q is initialized optimistically
"""
def __init__(self, optimistic=False):