Skip to content

Instantly share code, notes, and snippets.

@ntasfi
Created April 13, 2016 21:41
Show Gist options
  • Save ntasfi/49b7b3032775559eebf3e6ad2aec2674 to your computer and use it in GitHub Desktop.
Save ntasfi/49b7b3032775559eebf3e6ad2aec2674 to your computer and use it in GitHub Desktop.
import os
import importlib
import logging
import numpy as np
from pastalog import Log
from sacred import Experiment
from sacred.observers import MongoObserver
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
from PLE_env import MyEnv as PLE_env
from deeprl.q_networks.q_net_theano import MyQNetwork
from deeprl.agent_ale import ALEAgent as PLEAgent
#could * import but better to know what you're getting
from deeprl.experiment.base_controllers import VerboseController, LearningRateController, DiscountFactorController, EpsilonController, InterleavedTestEpochController, Controller
class PLEDisplayToggleController(Controller):
def __init__(self, periodicity=2):
super(self.__class__, self).__init__()
self._epochCount = 0
self._periodicity = 2
def OnStart(self, agent):
if (self._active == False):
return
self._epochCount = 0
def OnEpochEnd(self, agent):
if (self._active == False):
return
self._epochCount += 1
if (self._epochCount % self._periodicity) == 0:
#prob not a good idea?
agent._environment._ple.display_screen = True
else:
agent._environment._ple.display_screen = False
class PastaLogController(Controller):
def __init__(self, model_name=None, host="https://localhost:9000", loss_periodicity=100):
super(self.__class__, self).__init__()
self._loss_periodicity = loss_periodicity
self._action_count = 0
self._log = Log(host, model_name)
def OnStart(self, agent):
if (self._active == False):
return
self._action_count = 0
self._log.post("learning_rate", value=agent.learningRate(), step=self._action_count)
self._log.post("discount", value=agent.discountFactor(), step=self._action_count)
self._log.post("epsilon", value=agent.epsilon(), step=self._action_count)
def OnActionTaken(self, agent):
if (self._active == False):
return
if self._action_count % self._loss_periodicity == 0:
bell_res = float(agent.avgBellmanResidual())
ep_v = float(agent.avgEpisodeVValue())
self._log.post("avg_bellman_residual", value=bell_res, step=self._action_count)
self._log.post("avg_episode_value", value=ep_v, step=self._action_count)
self._action_count += 1
def OnEpochEnd(self, agent):
if (self._active == False):
return
self._log.post("learning_rate", value=agent.learningRate(), step=self._action_count)
self._log.post("discount", value=agent.discountFactor(), step=self._action_count)
self._log.post("epsilon", value=agent.epsilon(), step=self._action_count)
ex = Experiment("dqn_trn")
@ex.config
def config():
#game specific
game_cfg = {
"width": 64,
"height": 64,
"name": "WaterWorld",
"other": {
"num_creeps": 8
},
"allowed": ["WaterWorld", "PuckWorld", "Snake", "RaycastMaze"]
}
assert game_cfg["name"] in game_cfg["allowed"], ("Game must be in games['allowed']:", game_cfg["allowed"])
#ple specific
ple_cfg = {
"fps": 30,
"frame_skip": 3,
"force_fps": True,
"display_screen": False,
"add_noop_action": True
}
#deeprl specific
deeprl_cfg = {
"steps_per_epoch": 90000,
"epochs": 30,
"steps_per_test": 30000,
"discount_inc_periodicity": 1,
"learning_decay_periodicity": 1
}
dqn_agent_cfg = {
"update_rule": "adam",
"batch_accumulator": "sum",
"learning_rate_init": 0.01,
"learning_rate_final": 1e-4,
"discount_init": 0.1,
"discount_max_at": 15,
"discount_max": 0.99,
"clip_delta": 1.0,
"epsilon_init": 1.0,
"epsilon_min": 0.1,
"epsilon_decay": deeprl_cfg["steps_per_epoch"]*5,
"update_frequency": 1,
"replay_memory_size": 1000000,
"batch_size": 32,
"freeze_interval": 10000,
"deterministic": True
}
#live in the moment @ the start and go for longer preds later
dqn_agent_cfg["discount_inc"] = (dqn_agent_cfg["discount_max"] - dqn_agent_cfg["discount_init"]) / dqn_agent_cfg["discount_max_at"]
dqn_agent_cfg["learning_rate_decay"] = np.exp(
np.log(dqn_agent_cfg["learning_rate_final"] / dqn_agent_cfg["learning_rate_init"]) / (deeprl_cfg["epochs"]/deeprl_cfg["learning_decay_periodicity"])
)
def init_game(game_cfg):
game = getattr(importlib.import_module("ple.games.%s" % game_cfg["name"].lower()), game_cfg["name"])
if "num_creeps" in game_cfg["other"] and game_cfg["name"] == "WaterWorld":
game = game(
width=game_cfg["width"],
height=game_cfg["height"],
num_creeps=game_cfg["other"]["num_creeps"])
else:
game = game(width=game_cfg["width"], height=game_cfg["height"])
return game
@ex.automain
def run(game_cfg, ple_cfg, deeprl_cfg, dqn_agent_cfg, _rnd):
rng = _rnd
game = init_game(game_cfg)
env = PLE_env(rng, game=game, **ple_cfg)
q_network = MyQNetwork(
env,
0.9,
0.0001,
0.0,
dqn_agent_cfg["clip_delta"],
dqn_agent_cfg["freeze_interval"],
dqn_agent_cfg["batch_size"],
"General_DQN_0",
"deepmind_rmsprop",
dqn_agent_cfg["batch_accumulator"],
rng
)
agent = PLEAgent(
env,
q_network,
dqn_agent_cfg["replay_memory_size"],
max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
dqn_agent_cfg["batch_size"],
rng
)
#prints summary of performance before each epoch
agent.attach(VerboseController(
evaluateOn="epoch",
periodicity=1
))
#decays the discount factor over time
agent.attach(LearningRateController(
initialLearningRate=dqn_agent_cfg["learning_rate_init"],
learningRateDecay=dqn_agent_cfg["learning_rate_decay"],
periodicity=deeprl_cfg["learning_decay_periodicity"]
))
#increases the discount factor over time
agent.attach(DiscountFactorController(
initialDiscountFactor=dqn_agent_cfg["discount_init"],
discountFactorGrowth=dqn_agent_cfg["discount_inc"],
discountFactorMax=dqn_agent_cfg["discount_max"],
periodicity=deeprl_cfg["discount_inc_periodicity"]
))
agent.attach(EpsilonController(
initialE=dqn_agent_cfg["epsilon_init"],
eDecays=dqn_agent_cfg["epsilon_decay"],
eMin=dqn_agent_cfg["epsilon_min"],
evaluateOn="action",
periodicity=1,
resetEvery="none"
))
#pretty live graphs
agent.attach(PastaLogController(
model_name="DQN_%s" % game_cfg['name'],
host="http://localhost:8000",
loss_periodicity=500
))
agent.attach(InterleavedTestEpochController(
id=PLE_env.VALIDATION_MODE,
epochLength=deeprl_cfg["steps_per_test"],
controllersToDisable=[0, 1, 2, 3, 4],
periodicity=2,
showScore=True,
summarizeEvery=1
))
#just toggles the display on and off so we can watch
agent.attach(PLEDisplayToggleController(
periodicity=2
))
agent.run(deeprl_cfg["epochs"], deeprl_cfg["steps_per_epoch"])
""" Interface with the PLE environment
Authors: Vincent Francois-Lavet, David Taralla
Modified by: Norman Tasfi
"""
import numpy as np
import cv2
from ple import PLE
from deeprl.base_classes import Environment
from mpl_toolkits.axes_grid1 import host_subplot
import mpl_toolkits.axisartist as AA
import matplotlib.pyplot as plt
class MyEnv(Environment):
VALIDATION_MODE = 0
def __init__(self, rng, game=None, frame_skip=4,
display_screen=True, add_noop_action=True, force_fps=True, fps=30):
self._mode = -1
self._modeScore = 0.0
self._modeEpisodeCount = 0
self._frameSkip = frame_skip if frame_skip >= 1 else 1
self._randomState = rng
if game is None:
raise ValueError("Game must be provided")
#always default ple to 1, let the env skip.
self._ple = PLE(game, frame_skip=1, display_screen=display_screen,
add_noop_action=add_noop_action, force_fps=force_fps, fps=fps)
self._ple.init()
w, h = self._ple.getScreenDims()
self._screen = np.empty((h, w), dtype=np.uint8)
self._reducedScreen = np.empty((48, 48), dtype=np.uint8)
self._actions = self._ple.getActionSet()
def reset(self, mode):
if mode == MyEnv.VALIDATION_MODE:
if self._mode != MyEnv.VALIDATION_MODE:
self._mode = MyEnv.VALIDATION_MODE
self._modeScore = 0.0
self._modeEpisodeCount = 0
else:
self._modeEpisodeCount += 1
elif self._mode != -1: # and thus mode == -1
self._mode = -1
self._ple.reset_game()
for _ in range(self._randomState.randint(15)):
self._ple.act(self._ple.NOOP)
self._screen = self._ple.getScreenGrayscale()
cv2.resize(self._screen, (48, 48), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
return [4 * [48 * [48 * [0]]]]
def act(self, action):
action = self._actions[action]
reward = 0
for _ in range(self._frameSkip):
reward += self._ple.act(action)
if self.inTerminalState():
break
self._screen = self._ple.getScreenGrayscale()
cv2.resize(self._screen, (48, 48), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
self._modeScore += reward
return np.sign(reward)
def summarizePerformance(self, test_data_set):
if self.inTerminalState() == False:
self._modeEpisodeCount += 1
print("== Mean score per episode is {} over {} episodes ==".format(self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount))
def inputDimensions(self):
return [(4, 48, 48)]
def observationType(self, subject):
return np.uint8
def nActions(self):
return len(self._actions)
def observe(self):
return [np.array(self._reducedScreen)]
def inTerminalState(self):
return self._ple.game_over()
if __name__ == "__main__":
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment