-
-
Save alexaorrico/35421671deee68db1dc7607292f49062 to your computer and use it in GitHub Desktop.
keras-rl2 issue: DDPG + MultiInputProcessor + HandReach-v0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import gym | |
import numpy as np | |
from rl.agents import DDPGAgent | |
from rl.memory import SequentialMemory | |
from rl.processors import MultiInputProcessor | |
from rl.random import OrnsteinUhlenbeckProcess | |
from tensorflow.keras import Input, Model | |
from tensorflow.keras.layers import concatenate, Dense, Flatten | |
from tensorflow.keras.optimizers import Adam | |
def create_actor(inputs, A): | |
I_observation, I_achieved, I_desired = inputs | |
x = concatenate([I_observation, I_achieved, I_desired], axis=-1) | |
x = Flatten()(x) | |
x = Dense(400, activation='relu')(x) | |
x = Dense(200, activation='relu')(x) | |
y = Dense(A, activation='tanh')(x) | |
actor = Model(inputs=inputs, outputs=y) | |
return actor | |
def create_critic(inputs): | |
I_observation, I_achieved, I_desired, I_action = inputs | |
x = concatenate([I_observation, I_achieved, I_desired], axis=-1) | |
x = Flatten()(x) | |
x = Dense(400, activation='relu')(x) | |
x = concatenate([x, I_action]) | |
x = Dense(200, activation='relu')(x) | |
y = Dense(1, activation='linear')(x) | |
critic = Model(inputs=inputs, outputs=y) | |
return critic | |
ENV_NAME = 'HandReach-v0' | |
env = gym.make(ENV_NAME) | |
env = gym.wrappers.Monitor(env, './{}'.format(ENV_NAME), force=True) | |
observation = env.reset() | |
A = env.action_space.shape[0] | |
S = observation['observation'].shape[0] | |
G = observation['desired_goal'].shape[0] | |
I_observation = Input(shape=(1, S), name='observation') | |
I_achieved = Input(shape=(1, G), name='achieved_goal') | |
I_desired = Input(shape=(1, G), name='desired_goal') | |
I_action = Input(shape=(A,)) | |
actor = create_actor([I_observation, I_achieved, I_desired], A) | |
critic = create_critic([I_observation, I_achieved, I_desired, I_action]) | |
memory = SequentialMemory(limit=100000, window_length=1) | |
random_process = OrnsteinUhlenbeckProcess(size=A, theta=0.15, mu=0., sigma=0.1) | |
processor = MultiInputProcessor(3) | |
agent = DDPGAgent(nb_actions=A, actor=actor, critic=critic, critic_action_input=I_action, | |
memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, | |
random_process=random_process, gamma=0.99, target_model_update=1e-3, | |
processor=processor) | |
agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) | |
agent.fit(env, nb_steps=1000000, visualize=False, verbose=1) | |
agent.save_weights('ddpg+her_{}_weights.h5f'.format(ENV_NAME), overwrite=True) | |
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment