Skip to content

Instantly share code, notes, and snippets.

@njp947
Created January 7, 2017 10:12
Show Gist options
  • Save njp947/fc9c23d052e8153cf24b0bf9ca9c2413 to your computer and use it in GitHub Desktop.
Save njp947/fc9c23d052e8153cf24b0bf9ca9c2413 to your computer and use it in GitHub Desktop.
cma-v3
import argparse
import numpy
import keras
import gym
import cma
parser = argparse.ArgumentParser()
parser.add_argument("environment")
args = parser.parse_args()
environment = gym.make(args.environment)
model = keras.models.Sequential()
model.add(keras.layers.convolutional.Convolution2D(16, 8, 8, subsample=(4,4), activation="relu", input_shape=environment.observation_space.shape))
model.add(keras.layers.convolutional.Convolution2D(32, 4, 4, subsample=(2,2), activation="relu"))
model.add(keras.layers.core.Flatten())
model.add(keras.layers.core.Dense(256, activation="relu"))
model.add(keras.layers.core.Dense(environment.action_space.n))
shapes = [weight.shape for weight in model.get_weights()]
def get_solution(weights):
return numpy.concatenate([weight.reshape(-1) for weight in weights])
def set_weights(solution):
model.set_weights([solution[1:1+numpy.prod(shape)].reshape(shape) for shape in shapes])
def get_action(observation):
return numpy.argmax(model.predict_on_batch(observation))
shape = (1,) + environment.observation_space.shape
def get_reward():
observation = environment.reset()
Reward = 0
done = False
while not done:
observation = observation.reshape(shape)
action = get_action(observation)
observation, reward, done, _info = environment.step(action)
Reward += reward
return Reward
def f(x):
set_weights(x)
Reward = get_reward()
return -Reward
x0 = get_solution(model.get_weights())
environment.monitor.start("")
cma.fmin(f, x0, 1.0, {"maxfevals": 1e4, "tolx": 0, "tolfun": 0, "tolfunhist": 0})
environment.monitor.close()
gym.upload("", algorithm_id="alg_Y9Siabv9RMiM7Zvj2YpeEA")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment