-
-
Save dommueller/8e60f94568ac232f83fdfff48a45467a to your computer and use it in GitHub Desktop.
Using NeuroEvolution of Augmenting Topologies (NEAT) to solve the OpenAI gym cart pole enviroment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A neural network is trained using NeuroEvolution of Augmenting Topologies | |
# The idea is from the paper: "Evolving Neural Networks through Augmenting Topologies" | |
# This gist is using MultiNEAT (http://multineat.com/) | |
# DISCLAIMER: The learning capabilities are not really shown in this example since the algorithm got lucky with picking the initial weights | |
import logging | |
import os | |
import numpy as np | |
import gym | |
import MultiNEAT as NEAT | |
from MultiNEAT import EvaluateGenomeList_Serial | |
from MultiNEAT import GetGenomeList, ZipFitness | |
params = NEAT.Parameters() | |
params.PopulationSize = 10 | |
def trainNetwork(env): | |
def evaluate(genome): | |
net = NEAT.NeuralNetwork() | |
genome.BuildPhenotype(net) | |
cum_reward = 0 | |
episode_count = 20 | |
max_steps = 200 | |
for i in xrange(episode_count): | |
ob = env.reset() | |
net.Flush() | |
for j in xrange(max_steps): | |
# get next action | |
net.Input(ob) | |
net.Activate() | |
o = net.Output() | |
action = np.argmax(o) | |
ob, reward, done, _ = env.step(action) | |
cum_reward += reward | |
if done: | |
break | |
return cum_reward | |
# Initialize Genome | |
g = NEAT.Genome(0, len(env.observation_space.high), 0, env.action_space.n, False, NEAT.ActivationFunction.RELU, NEAT.ActivationFunction.RELU, 0, params) | |
seed = 0 | |
generationSize = 10 | |
pop = NEAT.Population(g, params, True, 1.0, seed) | |
for generation in range(generationSize): | |
genome_list = NEAT.GetGenomeList(pop) | |
fitness_list = EvaluateGenomeList_Serial(genome_list, evaluate, display=False) | |
assert(len(genome_list) == len(fitness_list)) | |
NEAT.ZipFitness(genome_list, fitness_list) | |
best = max(fitness_list) | |
if best == 4000: | |
break | |
if generation < generationSize - 1: | |
pop.Epoch() | |
max_genome = max(zip(genome_list,fitness_list), key=lambda item:item[1]) | |
return max_genome[0] | |
if __name__ == '__main__': | |
# Copied from example/random_agent.py | |
logger = logging.getLogger() | |
logger.setLevel(logging.INFO) | |
env = gym.make('CartPole-v0') | |
outdir = '/tmp/neat-cartpole-results' | |
env.monitor.start(outdir, force=True) | |
learned = trainNetwork(env) | |
# After network parameters are learned try the 100 episodes | |
net = NEAT.NeuralNetwork() | |
learned.BuildPhenotype(net) | |
episode_count = 200 | |
max_steps = 200 | |
for i in xrange(episode_count): | |
ob = env.reset() | |
net.Flush() | |
for j in xrange(max_steps): | |
# get next action | |
net.Input(ob) | |
net.Activate() | |
o = net.Output() | |
action = np.argmax(o) | |
ob, reward, done, _ = env.step(action) | |
if done: | |
break | |
# Dump result info to disk | |
env.monitor.close() | |
# Upload to the scoreboard. We could also do this from another | |
# process if we wanted. | |
logger.info("Successfully ran NEAT. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.") | |
# gym.upload(outdir, algorithm_id='neat') |
Reproduced here (198 eps to solve)
Lots of variance when using random seeds:
I'm not sure about using an early break if the upperbound score for a single agent is found (20 eps scoring 200 per ep) because it is very environment specific as well as will break if you change the times the environment is run per agent
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Reproduced here: https://gym.openai.com/evaluations/eval_w8MhbdYUT52bz7dKQrUvA.
Marking as reviewed!