Created
December 4, 2016 20:24
-
-
Save hmate9/49758ee1117ae55616f45d72186834a5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import gym | |
from keras.models import Sequential | |
from keras.layers import Dense, Activation, Flatten | |
from keras.optimizers import Adam | |
from rl.agents.dqn import DQNAgent | |
from rl.policy import BoltzmannQPolicy, LinearAnnealedPolicy, EpsGreedyQPolicy | |
from rl.memory import SequentialMemory | |
import gym_catch | |
ENV_NAME = 'catch-v0' | |
# Get the environment and extract the number of actions. | |
env = gym.make(ENV_NAME) | |
np.random.seed(123) | |
env.seed(123) | |
nb_actions = env.action_space.n | |
print("Actions: " + str(nb_actions)) | |
print("ObSpace: " + str(env.observation_space.shape)) | |
# Next, we build a very simple model. | |
model = Sequential() | |
model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) | |
model.add(Dense(100)) | |
model.add(Activation('relu')) | |
model.add(Dense(100)) | |
model.add(Activation('relu')) | |
model.add(Dense(nb_actions)) | |
model.add(Activation('linear')) | |
print(model.summary()) | |
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and | |
# even the metrics! | |
memory = SequentialMemory(limit=500, window_length=1) | |
policy = BoltzmannQPolicy(tau=0.05) | |
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, | |
target_model_update=10000, policy=policy) | |
dqn.compile(Adam(lr=0.2), metrics=['mae']) | |
# Okay, now it's time to learn something! We visualize the training here for show, but this | |
# slows down training quite a lot. You can always safely abort the training prematurely using | |
# Ctrl + C. | |
dqn.fit(env, nb_steps=100000, visualize=False, verbose=2) | |
# After training is done, we save the final weights. | |
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) | |
#dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME)) | |
# Finally, evaluate our algorithm for 5 episodes. | |
dqn.test(env, nb_episodes=500, visualize=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@codiphy hey I just randomly stumbled on this comment. The solution was posted in keras-rl/keras-rl#51
tldr: In my environment implementation of the
step
function when I returned the state I returned a reference to the current state, and not a deep copy. This messed up things completely of course.