Skip to content

Instantly share code, notes, and snippets.

@ryogrid
Last active February 22, 2017 01:31
Show Gist options
  • Save ryogrid/078f6baf539be03c85c4b836d5cfcc53 to your computer and use it in GitHub Desktop.
Save ryogrid/078f6baf539be03c85c4b836d5cfcc53 to your computer and use it in GitHub Desktop.
agent using NEAT method
above codes emulated https://gist.github.com/stefanopalmieri/2efec0e09c14de06fb93fbb91e18a93a
[Run]
git clone https://github.com/CodeReclaimers/neat-python.git
cd neat-python
sudo python setup.py install
# at dir contains script and xor2.config
python walker_neat.py
from __future__ import print_function
import gym
import numpy as np
import itertools
import os
from neat import nn, population, statistics
np.set_printoptions(threshold=np.inf)
env = gym.make('BipedalWalker-v2')
# run through the population
def eval_fitness(genomes):
for g in genomes:
observation = env.reset()
# env.render()
net = nn.create_feed_forward_phenotype(g)
fitness = 0
reward = 0
frames = 0
total_fitness = 0
for k in range(5):
while 1:
inputs = observation
# active neurons
output = net.serial_activate(inputs)
output = np.clip(output, -1, 1)
# print(output)
observation, reward, done, info = env.step(np.array(output))
fitness += reward
frames += 1
# env.render()
if done or frames > 2000:
total_fitness += fitness
# print(fitness)
env.reset()
break
# evaluate the fitness
g.fitness = total_fitness / 5
print(g.fitness)
local_dir = os.path.dirname(__file__)
config_path = os.path.join(local_dir, 'xor2.config')
pop = population.Population(config_path)
pop.run(eval_fitness, 1000)
winner = pop.statistics.best_genome()
del pop
winningnet = nn.create_feed_forward_phenotype(winner)
env.monitor.start('./walker-experiment', force=True)
streak = 0
episode = 0
best_reward = -200
while streak < 100:
fitness = 0
frames = 0
reward = 0
observation = env.reset()
while 1:
inputs = observation
# active neurons
output = winningnet.serial_activate(inputs)
output = np.clip(output, -1, 1)
# print(output)
observation, reward, done, info = env.step(np.array(output))
fitness += reward
frames += 1
if done or frames > 2000:
if fitness >= 100:
print(fitness)
print ('streak: ', streak)
streak += 1
else:
print(fitness)
print('streak: ', streak)
streak = 0
break
episoode += 1
if fitness > best_reward:
best_reward = fitness
print(str(episode) + " " + str(fitness) + " " + str(best_reward))
print("completed!")
env.monitor.close()
# The `Types` section specifies which classes should be used for various
# tasks in the NEAT algorithm. If you use a non-default class here, you
# must register it with your Config instance before loading the config file.
[Types]
stagnation_type = DefaultStagnation
reproduction_type = DefaultReproduction
[phenotype]
input_nodes = 24
hidden_nodes = 0
output_nodes = 4
initial_connection = fs_neat
max_weight = 10
min_weight = -10
feedforward = 0
activation_functions = tanh sigmoid relu identity
weight_stdev = 3
[genetic]
pop_size = 400
max_fitness_threshold = 130
prob_add_conn = 0.3
prob_add_node = 0.1
prob_delete_conn = 0.05
prob_delete_node = 0.03
prob_mutate_bias = 0.00109
bias_mutation_power = 0.01
prob_mutate_response = 0.01
response_mutation_power = 0.01
prob_mutate_weight = 0.3
prob_replace_weight = 0.03
weight_mutation_power = 0.1
prob_mutate_activation = 0.01
prob_toggle_link = 0.0138
reset_on_extinction = 1
[genotype compatibility]
compatibility_threshold = 3
excess_coefficient = 1.0
disjoint_coefficient = 1.0
weight_coefficient = 0.4
[DefaultStagnation]
species_fitness_func = mean
max_stagnation = 5
[DefaultReproduction]
elitism = 3
survival_threshold = 0.2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment