ryogrid/mountain_neat.py

## readme.txt
[Run]
git clone https://github.com/CodeReclaimers/neat-python.git
cd neat-python
sudo python setup.py install
# at dir contains script and neat_mountain.config
python mountain_neat.py

## mountain_neat.py
from __future__ import print_function

import gym
import numpy as np
import itertools
import os

from neat import nn, population, statistics

np.set_printoptions(threshold=np.inf)
env = gym.make('MountainCar-v0')

# run through the population


def eval_fitness(genomes):
    for g in genomes:
        observation = env.reset()
        # env.render()
        net = nn.create_feed_forward_phenotype(g)
        fitness = 0
        reward = 0
        total_fitness = 0

        for k in range(1):
            fitness = -100
            frames = 0
            while 1:
                inputs = observation

                # active neurons
                output = net.serial_activate(inputs)

                output = np.clip(output, 0, 2)
                #output = np.round(output)
                # print(output)
                observation, reward, done, info = env.step(int(np.array(output)[0]))

                if fitness < observation[0]:
                    fitness = observation[0]
                # env.render()
                frames += 1
                if done or frames > 500:
                    total_fitness += fitness
                    # print(fitness)
                    env.reset()
                    break
        # evaluate the fitness
        g.fitness = total_fitness / 1
        print(g.fitness)

local_dir = os.path.dirname(__file__)
config_path = os.path.join(local_dir, 'neat_mountain.config')

pop = population.Population(config_path)
pop.run(eval_fitness, 1000)
winner = pop.statistics.best_genome()
del pop

winningnet = nn.create_feed_forward_phenotype(winner)

env.monitor.start('./mountain-experiment', force=True)


streak = 0
episode = 0
best_reward = -9999
while streak < 100:
    fitness = 0
    reward = 0
    observation = env.reset()
    frames = 0
    while 1:
        inputs = observation

        # active neurons
        output = winningnet.serial_activate(inputs)
        output = np.clip(output, 0, 2)
        #output = np.round(output)

        # print(output)
        observation, reward, done, info = env.step(np.array(int(np.array(output)[0])))

        fitness += reward

        frames += 1
        if done or frames > 500:
            if fitness >= -1900 :
                    print(fitness)
                    print ('streak: ', streak)
                    streak += 1
            else:
                print(fitness)
                print('streak: ', streak)
            break

    episode += 1
    if fitness > best_reward:
        best_reward = fitness
    print(str(episode) + " " + str(fitness) + " " + str(best_reward))
print("completed!")
env.monitor.close()

## neat_mountain.config
# The `Types` section specifies which classes should be used for various
# tasks in the NEAT algorithm.  If you use a non-default class here, you
# must register it with your Config instance before loading the config file.
[Types]
stagnation_type      = DefaultStagnation
reproduction_type    = DefaultReproduction

[phenotype]
input_nodes          = 2
hidden_nodes         = 0
output_nodes         = 1
initial_connection   = fs_neat
max_weight           = 10
min_weight           = -10
feedforward          = 0
activation_functions = tanh sigmoid relu identity
weight_stdev         = 3

[genetic]
pop_size                = 400
max_fitness_threshold   = 0.5
prob_add_conn           = 0.3
prob_add_node           = 0.1
prob_delete_conn        = 0.05
prob_delete_node        = 0.03
prob_mutate_bias        = 0.00109
bias_mutation_power     = 0.01
prob_mutate_response    = 0.01
response_mutation_power = 0.01
prob_mutate_weight      = 0.3
prob_replace_weight     = 0.03
weight_mutation_power   = 0.1
prob_mutate_activation  = 0.01
prob_toggle_link        = 0.0138
reset_on_extinction     = 1

[genotype compatibility]
compatibility_threshold = 3
excess_coefficient      = 1.0
disjoint_coefficient    = 1.0
weight_coefficient      = 0.4

[DefaultStagnation]
species_fitness_func = mean
max_stagnation       = 5

[DefaultReproduction]
elitism              = 3
survival_threshold   = 0.2
	[Run]
	git clone https://github.com/CodeReclaimers/neat-python.git
	cd neat-python
	sudo python setup.py install
	# at dir contains script and neat_mountain.config
	python mountain_neat.py
	from __future__ import print_function

	import gym
	import numpy as np
	import itertools
	import os

	from neat import nn, population, statistics

	np.set_printoptions(threshold=np.inf)
	env = gym.make('MountainCar-v0')

	# run through the population


	def eval_fitness(genomes):
	for g in genomes:
	observation = env.reset()
	# env.render()
	net = nn.create_feed_forward_phenotype(g)
	fitness = 0
	reward = 0
	total_fitness = 0

	for k in range(1):
	fitness = -100
	frames = 0
	while 1:
	inputs = observation

	# active neurons
	output = net.serial_activate(inputs)

	output = np.clip(output, 0, 2)
	#output = np.round(output)
	# print(output)
	observation, reward, done, info = env.step(int(np.array(output)[0]))

	if fitness < observation[0]:
	fitness = observation[0]
	# env.render()
	frames += 1
	if done or frames > 500:
	total_fitness += fitness
	# print(fitness)
	env.reset()
	break
	# evaluate the fitness
	g.fitness = total_fitness / 1
	print(g.fitness)

	local_dir = os.path.dirname(__file__)
	config_path = os.path.join(local_dir, 'neat_mountain.config')

	pop = population.Population(config_path)
	pop.run(eval_fitness, 1000)
	winner = pop.statistics.best_genome()
	del pop

	winningnet = nn.create_feed_forward_phenotype(winner)

	env.monitor.start('./mountain-experiment', force=True)


	streak = 0
	episode = 0
	best_reward = -9999
	while streak < 100:
	fitness = 0
	reward = 0
	observation = env.reset()
	frames = 0
	while 1:
	inputs = observation

	# active neurons
	output = winningnet.serial_activate(inputs)
	output = np.clip(output, 0, 2)
	#output = np.round(output)

	# print(output)
	observation, reward, done, info = env.step(np.array(int(np.array(output)[0])))

	fitness += reward

	frames += 1
	if done or frames > 500:
	if fitness >= -1900 :
	print(fitness)
	print ('streak: ', streak)
	streak += 1
	else:
	print(fitness)
	print('streak: ', streak)
	break

	episode += 1
	if fitness > best_reward:
	best_reward = fitness
	print(str(episode) + " " + str(fitness) + " " + str(best_reward))
	print("completed!")
	env.monitor.close()
	# The `Types` section specifies which classes should be used for various
	# tasks in the NEAT algorithm. If you use a non-default class here, you
	# must register it with your Config instance before loading the config file.
	[Types]
	stagnation_type = DefaultStagnation
	reproduction_type = DefaultReproduction

	[phenotype]
	input_nodes = 2
	hidden_nodes = 0
	output_nodes = 1
	initial_connection = fs_neat
	max_weight = 10
	min_weight = -10
	feedforward = 0
	activation_functions = tanh sigmoid relu identity
	weight_stdev = 3

	[genetic]
	pop_size = 400
	max_fitness_threshold = 0.5
	prob_add_conn = 0.3
	prob_add_node = 0.1
	prob_delete_conn = 0.05
	prob_delete_node = 0.03
	prob_mutate_bias = 0.00109
	bias_mutation_power = 0.01
	prob_mutate_response = 0.01
	response_mutation_power = 0.01
	prob_mutate_weight = 0.3
	prob_replace_weight = 0.03
	weight_mutation_power = 0.1
	prob_mutate_activation = 0.01
	prob_toggle_link = 0.0138
	reset_on_extinction = 1

	[genotype compatibility]
	compatibility_threshold = 3
	excess_coefficient = 1.0
	disjoint_coefficient = 1.0
	weight_coefficient = 0.4

	[DefaultStagnation]
	species_fitness_func = mean
	max_stagnation = 5

	[DefaultReproduction]
	elitism = 3
	survival_threshold = 0.2