This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # An incremental updates version of off-policy MC Control | |
| # using weighted importance sampling | |
| def OffPolicyEveryVistMCControl(gamma, num_episodes): | |
| # Target policy is a deterministic policy with initial actions chosen | |
| # randomly for each state | |
| pi = Policy('d') | |
| # Behavior policy is a nondeterministic policy that selects all actions | |
| # with equal probability | |
| b = Policy('nd') | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | def OnPolicyFirstVistMCControl(gamma, num_episodes, epsilon=0.1): | |
| # Initialize e-greedy policy with initial actions chosen | |
| # randomly for each state | |
| policy = Policy('eg', epsilon) | |
| # Initialize Q (action-value function) and returns for the 4x4 grid world | |
| Q = {} | |
| returns = {} | |
| for x in range(4): | |
| for y in range(4): | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Implementation of Monte Carlo Control with Exploring Starts | |
| def MCES(gamma, num_episodes): | |
| # Initialize deterministic policy with initial actions chosen | |
| # randomly for each state | |
| policy = Policy('d') | |
| # Initialize Q (action-value function) and returns for the 4x4 grid world | |
| Q = {} | |
| returns = {} | |
| for x in range(4): | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Implementation of first visit MC prediction | |
| def firstVisitMCPred(gamma, num_episodes): | |
| # Initialize nondet policy that selects an action | |
| # randomly with all actions having equal probability | |
| policy = Policy('nd') | |
| # Initialize V (state-value function) and returns for the 4x4 grid world | |
| V = {} | |
| returns = {} | |
| for x in range(4): | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import numpy as np | |
| import pprint | |
| class State(object): | |
| """ | |
| Represents a state or a point in the grid. | |
| coord: coordinate in grid world | |
| """ | |
| def __init__(self, coord, is_terminal): | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import gym | |
| from GridWorldEnv import GridWorldEnv | |
| from Agents import PolicyIterationAgent | |
| env = GridWorldEnv() | |
| env.reset() | |
| env.render() | |
| pi_agent = PolicyIterationAgent(0.9) | |
| pi_agent.policyIterate() | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import numpy as np | |
| import pprint | |
| class State(object): | |
| """ | |
| Represents a state or a point in the grid. | |
| coord: coordinate in grid world | |
| """ | |
| def __init__(self, coord, is_terminal): | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import gym | |
| from gym import Env | |
| from gym.spaces import Discrete, Box | |
| import random | |
| import numpy as np | |
| import pprint | |
| class GridWorldEnv(Env): | |
| """ | |
| Simple 4x4 GridWorld Env from figure 4.1 in the RL textbook | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | from Agents import GradientBanditAgent | |
| import matplotlib.pyplot as plt | |
| from NonStationaryBanditEnv import NonStationaryBanditEnv | |
| import numpy as np | |
| env = NonStationaryBanditEnv() | |
| def testGBanditAgent(alpha): | |
| g_bandit_agent = GradientBanditAgent(env, alpha) | |
| num_episodes = 10000 | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import random | |
| import numpy as np | |
| class Action(object): | |
| """ | |
| Implementation of an action. | |
| optimistic: determines whether or not Q is initialized optimistically | |
| """ | |
| def __init__(self, optimistic=False): | 
NewerOlder