Skip to content

Instantly share code, notes, and snippets.

@njp947
Last active February 25, 2017 13:15
Show Gist options
  • Save njp947/e75c17751f1da59e770b5aa4486cd442 to your computer and use it in GitHub Desktop.
Save njp947/e75c17751f1da59e770b5aa4486cd442 to your computer and use it in GitHub Desktop.
UH-CMA-ES 0.1
import argparse
import numpy
import keras
import gym
import cma
parser = argparse.ArgumentParser()
parser.add_argument("environment")
args = parser.parse_args()
environment = gym.make(args.environment)
model = keras.models.Sequential([
keras.layers.Dense(10, activation="tanh", input_shape=environment.observation_space.shape),
keras.layers.Dense(5, activation="tanh"),
keras.layers.Dense(environment.action_space.n)])
shapes = [weight.shape for weight in model.get_weights()]
def get_solution(weights):
return numpy.concatenate([weight.reshape(-1) for weight in weights])
def set_weights(solution):
model.set_weights([solution[1:1+numpy.prod(shape)].reshape(shape) for shape in shapes])
def get_action(observation):
return numpy.argmax(model.predict_on_batch(observation))
shape = (1,) + environment.observation_space.shape
def get_reward():
observation = environment.reset()
Reward = 0
done = False
while not done:
observation = observation.reshape(shape)
action = get_action(observation)
observation, reward, done, _info = environment.step(action)
Reward += reward
return Reward
def f(x):
set_weights(x)
Reward = get_reward()
return -Reward
x0 = get_solution(model.get_weights())
options = {}
options['maxfevals'] = 1e4
options['tolx'] = 0
options['tolfun'] = 0
options['tolfunhist'] = 0
noise_handler = cma.NoiseHandler(len(x0))
environment.monitor.start("gym", video_callable=False)
cma.fmin(f, x0, 0.1, options, noise_handler=noise_handler)
environment.monitor.close()
gym.upload("gym", algorithm_id="alg_RZqXTrW7QXmXmzc0y9rAmw")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment