Skip to content

Instantly share code, notes, and snippets.

@alexaorrico
Last active March 18, 2021 02:29
Show Gist options
  • Save alexaorrico/35421671deee68db1dc7607292f49062 to your computer and use it in GitHub Desktop.
Save alexaorrico/35421671deee68db1dc7607292f49062 to your computer and use it in GitHub Desktop.
keras-rl2 issue: DDPG + MultiInputProcessor + HandReach-v0
#!/usr/bin/env python3
import gym
import numpy as np
from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.processors import MultiInputProcessor
from rl.random import OrnsteinUhlenbeckProcess
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import concatenate, Dense, Flatten
from tensorflow.keras.optimizers import Adam
def create_actor(inputs, A):
I_observation, I_achieved, I_desired = inputs
x = concatenate([I_observation, I_achieved, I_desired], axis=-1)
x = Flatten()(x)
x = Dense(400, activation='relu')(x)
x = Dense(200, activation='relu')(x)
y = Dense(A, activation='tanh')(x)
actor = Model(inputs=inputs, outputs=y)
return actor
def create_critic(inputs):
I_observation, I_achieved, I_desired, I_action = inputs
x = concatenate([I_observation, I_achieved, I_desired], axis=-1)
x = Flatten()(x)
x = Dense(400, activation='relu')(x)
x = concatenate([x, I_action])
x = Dense(200, activation='relu')(x)
y = Dense(1, activation='linear')(x)
critic = Model(inputs=inputs, outputs=y)
return critic
ENV_NAME = 'HandReach-v0'
env = gym.make(ENV_NAME)
env = gym.wrappers.Monitor(env, './{}'.format(ENV_NAME), force=True)
observation = env.reset()
A = env.action_space.shape[0]
S = observation['observation'].shape[0]
G = observation['desired_goal'].shape[0]
I_observation = Input(shape=(1, S), name='observation')
I_achieved = Input(shape=(1, G), name='achieved_goal')
I_desired = Input(shape=(1, G), name='desired_goal')
I_action = Input(shape=(A,))
actor = create_actor([I_observation, I_achieved, I_desired], A)
critic = create_critic([I_observation, I_achieved, I_desired, I_action])
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=A, theta=0.15, mu=0., sigma=0.1)
processor = MultiInputProcessor(3)
agent = DDPGAgent(nb_actions=A, actor=actor, critic=critic, critic_action_input=I_action,
memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000,
random_process=random_process, gamma=0.99, target_model_update=1e-3,
processor=processor)
agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])
agent.fit(env, nb_steps=1000000, visualize=False, verbose=1)
agent.save_weights('ddpg+her_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment