Skip to content

Instantly share code, notes, and snippets.

@dommueller
Created April 29, 2016 12:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dommueller/8e60f94568ac232f83fdfff48a45467a to your computer and use it in GitHub Desktop.
Save dommueller/8e60f94568ac232f83fdfff48a45467a to your computer and use it in GitHub Desktop.
Using NeuroEvolution of Augmenting Topologies (NEAT) to solve the OpenAI gym cart pole enviroment
# A neural network is trained using NeuroEvolution of Augmenting Topologies
# The idea is from the paper: "Evolving Neural Networks through Augmenting Topologies"
# This gist is using MultiNEAT (http://multineat.com/)
# DISCLAIMER: The learning capabilities are not really shown in this example since the algorithm got lucky with picking the initial weights
import logging
import os
import numpy as np
import gym
import MultiNEAT as NEAT
from MultiNEAT import EvaluateGenomeList_Serial
from MultiNEAT import GetGenomeList, ZipFitness
params = NEAT.Parameters()
params.PopulationSize = 10
def trainNetwork(env):
def evaluate(genome):
net = NEAT.NeuralNetwork()
genome.BuildPhenotype(net)
cum_reward = 0
episode_count = 20
max_steps = 200
for i in xrange(episode_count):
ob = env.reset()
net.Flush()
for j in xrange(max_steps):
# get next action
net.Input(ob)
net.Activate()
o = net.Output()
action = np.argmax(o)
ob, reward, done, _ = env.step(action)
cum_reward += reward
if done:
break
return cum_reward
# Initialize Genome
g = NEAT.Genome(0, len(env.observation_space.high), 0, env.action_space.n, False, NEAT.ActivationFunction.RELU, NEAT.ActivationFunction.RELU, 0, params)
seed = 0
generationSize = 10
pop = NEAT.Population(g, params, True, 1.0, seed)
for generation in range(generationSize):
genome_list = NEAT.GetGenomeList(pop)
fitness_list = EvaluateGenomeList_Serial(genome_list, evaluate, display=False)
assert(len(genome_list) == len(fitness_list))
NEAT.ZipFitness(genome_list, fitness_list)
best = max(fitness_list)
if best == 4000:
break
if generation < generationSize - 1:
pop.Epoch()
max_genome = max(zip(genome_list,fitness_list), key=lambda item:item[1])
return max_genome[0]
if __name__ == '__main__':
# Copied from example/random_agent.py
logger = logging.getLogger()
logger.setLevel(logging.INFO)
env = gym.make('CartPole-v0')
outdir = '/tmp/neat-cartpole-results'
env.monitor.start(outdir, force=True)
learned = trainNetwork(env)
# After network parameters are learned try the 100 episodes
net = NEAT.NeuralNetwork()
learned.BuildPhenotype(net)
episode_count = 200
max_steps = 200
for i in xrange(episode_count):
ob = env.reset()
net.Flush()
for j in xrange(max_steps):
# get next action
net.Input(ob)
net.Activate()
o = net.Output()
action = np.argmax(o)
ob, reward, done, _ = env.step(action)
if done:
break
# Dump result info to disk
env.monitor.close()
# Upload to the scoreboard. We could also do this from another
# process if we wanted.
logger.info("Successfully ran NEAT. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
# gym.upload(outdir, algorithm_id='neat')
@gdb
Copy link

gdb commented May 1, 2016

Reproduced here: https://gym.openai.com/evaluations/eval_w8MhbdYUT52bz7dKQrUvA.

Marking as reviewed!

@JKCooper2
Copy link

Reproduced here (198 eps to solve)
Lots of variance when using random seeds:

I'm not sure about using an early break if the upperbound score for a single agent is found (20 eps scoring 200 per ep) because it is very environment specific as well as will break if you change the times the environment is run per agent

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment