This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| class Action(object): | |
| """ | |
| Implementation of an action. | |
| optimistic: determines whether or not Q is initialized optimistically | |
| """ | |
| def __init__(self, optimistic=False): | |
| # Current value estimation for this action |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import gym | |
| import gym_bandits | |
| from Agents import GreedyAgent, EpsilonGreedyAgent | |
| import matplotlib.pyplot as plt | |
| # env = gym.make("BanditTwoArmedDeterministicFixed-v0") | |
| env = gym.make("BanditTenArmedUniformDistributedReward-v0") | |
| greedy_agent = GreedyAgent(env) | |
| e_greedy_agent = EpsilonGreedyAgent(env, 0.1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import gym | |
| from gym import Env | |
| from gym.spaces import Discrete | |
| import random | |
| class NonStationaryBanditEnv(Env): | |
| """ | |
| Non-Stationary 5-arm Bandit Environment | |
| At any given time, the best action returns a reward of 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| class Action(object): | |
| """ | |
| Implementation of an action. | |
| optimistic: determines whether or not Q is initialized optimistically | |
| """ | |
| def __init__(self, optimistic=False): | |
| # Current value estimation for this action |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| import numpy as np | |
| class Action(object): | |
| """ | |
| Implementation of an action. | |
| optimistic: determines whether or not Q is initialized optimistically | |
| """ | |
| def __init__(self, optimistic=False): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from Agents import UCBAgent | |
| import matplotlib.pyplot as plt | |
| from NonStationaryBanditEnv import NonStationaryBanditEnv | |
| import numpy as np | |
| env = NonStationaryBanditEnv() | |
| def testUCBAgent(c): | |
| ucb_agent = UCBAgent(env, 0.1, c) | |
| num_episodes = 10000 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| import numpy as np | |
| class Action(object): | |
| """ | |
| Implementation of an action. | |
| optimistic: determines whether or not Q is initialized optimistically | |
| """ | |
| def __init__(self, optimistic=False): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from Agents import GradientBanditAgent | |
| import matplotlib.pyplot as plt | |
| from NonStationaryBanditEnv import NonStationaryBanditEnv | |
| import numpy as np | |
| env = NonStationaryBanditEnv() | |
| def testGBanditAgent(alpha): | |
| g_bandit_agent = GradientBanditAgent(env, alpha) | |
| num_episodes = 10000 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import gym | |
| from gym import Env | |
| from gym.spaces import Discrete, Box | |
| import random | |
| import numpy as np | |
| import pprint | |
| class GridWorldEnv(Env): | |
| """ | |
| Simple 4x4 GridWorld Env from figure 4.1 in the RL textbook |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import pprint | |
| class State(object): | |
| """ | |
| Represents a state or a point in the grid. | |
| coord: coordinate in grid world | |
| """ | |
| def __init__(self, coord, is_terminal): |
OlderNewer