dommueller/neat_cartpole.py Secret

## neat_cartpole.py
# A neural network is trained using NeuroEvolution of Augmenting Topologies
# The idea is from the paper: "Evolving Neural Networks through Augmenting Topologies"
# This gist is using MultiNEAT (http://multineat.com/)
# DISCLAIMER: The learning capabilities are not really shown in this example since the algorithm got lucky with picking the initial weights

import logging
import os
import numpy as np

import gym

import MultiNEAT as NEAT
from MultiNEAT import EvaluateGenomeList_Serial
from MultiNEAT import GetGenomeList, ZipFitness

params = NEAT.Parameters()
params.PopulationSize = 10

def trainNetwork(env):
    def evaluate(genome):
        net = NEAT.NeuralNetwork()
        genome.BuildPhenotype(net)

        cum_reward = 0
        episode_count = 20
        max_steps = 200

        for i in xrange(episode_count):
            ob = env.reset()
            net.Flush()

            for j in xrange(max_steps):
                # get next action
                net.Input(ob)
                net.Activate()
                o = net.Output()
                action = np.argmax(o)

                ob, reward, done, _ = env.step(action)
                cum_reward += reward
                if done:
                    break

        return cum_reward

    # Initialize Genome
    g = NEAT.Genome(0, len(env.observation_space.high), 0, env.action_space.n, False, NEAT.ActivationFunction.RELU, NEAT.ActivationFunction.RELU, 0, params)

    seed = 0
    generationSize = 10
    pop = NEAT.Population(g, params, True, 1.0, seed)

    for generation in range(generationSize):
        genome_list = NEAT.GetGenomeList(pop)
        fitness_list = EvaluateGenomeList_Serial(genome_list, evaluate, display=False)
        assert(len(genome_list) == len(fitness_list))
        NEAT.ZipFitness(genome_list, fitness_list)

        best = max(fitness_list)

        if best == 4000:
            break

        if generation < generationSize - 1:
            pop.Epoch()

    max_genome = max(zip(genome_list,fitness_list), key=lambda item:item[1])

    return max_genome[0]


if __name__ == '__main__':
    # Copied from example/random_agent.py
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    env = gym.make('CartPole-v0')
    outdir = '/tmp/neat-cartpole-results'
    env.monitor.start(outdir, force=True)

    learned = trainNetwork(env)

    # After network parameters are learned try the 100 episodes
    net = NEAT.NeuralNetwork()
    learned.BuildPhenotype(net)

    episode_count = 200
    max_steps = 200

    for i in xrange(episode_count):
        ob = env.reset()
        net.Flush()

        for j in xrange(max_steps):
            # get next action
            net.Input(ob)
            net.Activate()
            o = net.Output()
            action = np.argmax(o)
            ob, reward, done, _ = env.step(action)
            if done:
                break


    # Dump result info to disk
    env.monitor.close()

    # Upload to the scoreboard. We could also do this from another
    # process if we wanted.
    logger.info("Successfully ran NEAT. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
    # gym.upload(outdir, algorithm_id='neat')
	# A neural network is trained using NeuroEvolution of Augmenting Topologies
	# The idea is from the paper: "Evolving Neural Networks through Augmenting Topologies"
	# This gist is using MultiNEAT (http://multineat.com/)
	# DISCLAIMER: The learning capabilities are not really shown in this example since the algorithm got lucky with picking the initial weights

	import logging
	import os
	import numpy as np

	import gym

	import MultiNEAT as NEAT
	from MultiNEAT import EvaluateGenomeList_Serial
	from MultiNEAT import GetGenomeList, ZipFitness

	params = NEAT.Parameters()
	params.PopulationSize = 10

	def trainNetwork(env):
	def evaluate(genome):
	net = NEAT.NeuralNetwork()
	genome.BuildPhenotype(net)

	cum_reward = 0
	episode_count = 20
	max_steps = 200

	for i in xrange(episode_count):
	ob = env.reset()
	net.Flush()

	for j in xrange(max_steps):
	# get next action
	net.Input(ob)
	net.Activate()
	o = net.Output()
	action = np.argmax(o)

	ob, reward, done, _ = env.step(action)
	cum_reward += reward
	if done:
	break

	return cum_reward

	# Initialize Genome
	g = NEAT.Genome(0, len(env.observation_space.high), 0, env.action_space.n, False, NEAT.ActivationFunction.RELU, NEAT.ActivationFunction.RELU, 0, params)

	seed = 0
	generationSize = 10
	pop = NEAT.Population(g, params, True, 1.0, seed)

	for generation in range(generationSize):
	genome_list = NEAT.GetGenomeList(pop)
	fitness_list = EvaluateGenomeList_Serial(genome_list, evaluate, display=False)
	assert(len(genome_list) == len(fitness_list))
	NEAT.ZipFitness(genome_list, fitness_list)

	best = max(fitness_list)

	if best == 4000:
	break

	if generation < generationSize - 1:
	pop.Epoch()

	max_genome = max(zip(genome_list,fitness_list), key=lambda item:item[1])

	return max_genome[0]


	if __name__ == '__main__':
	# Copied from example/random_agent.py
	logger = logging.getLogger()
	logger.setLevel(logging.INFO)
	env = gym.make('CartPole-v0')
	outdir = '/tmp/neat-cartpole-results'
	env.monitor.start(outdir, force=True)

	learned = trainNetwork(env)

	# After network parameters are learned try the 100 episodes
	net = NEAT.NeuralNetwork()
	learned.BuildPhenotype(net)

	episode_count = 200
	max_steps = 200

	for i in xrange(episode_count):
	ob = env.reset()
	net.Flush()

	for j in xrange(max_steps):
	# get next action
	net.Input(ob)
	net.Activate()
	o = net.Output()
	action = np.argmax(o)
	ob, reward, done, _ = env.step(action)
	if done:
	break


	# Dump result info to disk
	env.monitor.close()

	# Upload to the scoreboard. We could also do this from another
	# process if we wanted.
	logger.info("Successfully ran NEAT. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
	# gym.upload(outdir, algorithm_id='neat')