Skip to content

Instantly share code, notes, and snippets.

@LucasColas
Forked from CodeReclaimers/config
Created November 24, 2019 10:59
Show Gist options
  • Save LucasColas/d7c7aa9c0b720eca44475ab54d8b1d56 to your computer and use it in GitHub Desktop.
Save LucasColas/d7c7aa9c0b720eca44475ab54d8b1d56 to your computer and use it in GitHub Desktop.
OpenAI Gym LunarLander-v2 writeup
# neat-python configuration for the LunarLander-v2 environment on OpenAI Gym
# Sample run here: https://gym.openai.com/evaluations/eval_FbKq5MxAS9GlvB7W6ioJkg
# NOTE: This was run using revision 1186029827c156e0ff6f9b36d6847eb2aa56757a of CodeReclaimers/neat-python, not a release on PyPI.
[NEAT]
pop_size = 150
# Note: the fitness threshold will never be reached because
# we are controlling the termination ourselves based on simulation performance.
fitness_criterion = max
fitness_threshold = 1000.0
reset_on_extinction = 0
[DefaultGenome]
num_inputs = 8
num_hidden = 0
num_outputs = 4
initial_connection = full
feed_forward = True
compatibility_disjoint_coefficient = 1.0
compatibility_weight_coefficient = 1.0
conn_add_prob = 0.15
conn_delete_prob = 0.1
node_add_prob = 0.15
node_delete_prob = 0.1
activation_default = clamped
activation_options = clamped
activation_mutate_rate = 0.0
aggregation_default = sum
aggregation_options = sum
aggregation_mutate_rate = 0.0
bias_init_mean = 0.0
bias_init_stdev = 1.0
bias_replace_rate = 0.02
bias_mutate_rate = 0.8
bias_mutate_power = 0.4
bias_max_value = 30.0
bias_min_value = -30.0
response_init_mean = 1.0
response_init_stdev = 0.0
response_replace_rate = 0.0
response_mutate_rate = 0.1
response_mutate_power = 0.01
response_max_value = 30.0
response_min_value = -30.0
weight_max_value = 30
weight_min_value = -30
weight_init_mean = 0.0
weight_init_stdev = 1.0
weight_mutate_rate = 0.8
weight_replace_rate = 0.02
weight_mutate_power = 0.4
enabled_default = True
enabled_mutate_rate = 0.01
[DefaultSpeciesSet]
compatibility_threshold = 3.0
[DefaultStagnation]
species_fitness_func = mean
max_stagnation = 15
species_elitism = 4
[DefaultReproduction]
elitism = 2
survival_threshold = 0.2
# Evolve a control/reward estimation network for the OpenAI Gym
# LunarLander-v2 environment (https://gym.openai.com/envs/LunarLander-v2).
# Sample run here: https://gym.openai.com/evaluations/eval_FbKq5MxAS9GlvB7W6ioJkg
# NOTE: This was run using revision 1186029827c156e0ff6f9b36d6847eb2aa56757a of CodeReclaimers/neat-python, not a release on PyPI.
from __future__ import print_function
import gym
import gym.wrappers
import matplotlib.pyplot as plt
import multiprocessing
import neat
import numpy as np
import os
import pickle
import random
import time
import visualize
env = gym.make('LunarLander-v2')
print("action space: {0!r}".format(env.action_space))
print("observation space: {0!r}".format(env.observation_space))
# Limit episode time steps to cut down on training time.
# 400 steps is more than enough time to land with a winning score.
print(env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps'))
env.spec.tags['wrapper_config.TimeLimit.max_episode_steps'] = 400
print(env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps'))
env = gym.wrappers.Monitor(env, 'results', force=True)
discounted_reward = 0.9
min_reward = -200
max_reward = 200
score_range = []
def compute_fitness(net, discounted_rewards, episodes):
reward_error = []
for discount_reward, episode in zip(discounted_rewards, episodes):
for (j, observation, action, reward), dr in zip(episode, discount_reward):
output = net.activate(observation)
reward_error.append(float((output[action] - dr) ** 2))
return reward_error
class PooledErrorCompute(object):
def __init__(self):
self.pool = multiprocessing.Pool()
def evaluate_genomes(self, genomes, config):
t0 = time.time()
nets = []
for gid, g in genomes:
nets.append((g, neat.nn.FeedForwardNetwork.create(g, config)))
g.fitness = []
print("network creation time {0}".format(time.time() - t0))
t0 = time.time()
episodes = []
for genome, net in nets:
observation = env.reset()
episode_data = []
j = 0
total_score = 0.0
while 1:
if net is not None:
output = net.activate(observation)
action = np.argmax(output)
else:
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
total_score += reward
episode_data.append((j, observation, action, reward))
if done:
break
j += 1
episodes.append((total_score, episode_data))
genome.fitness = total_score
print("simulation run time {0}".format(time.time() - t0))
t0 = time.time()
scores = [s for s, e in episodes]
score_range.append((min(scores), np.mean(scores), max(scores)))
# Compute discounted rewards.
discounted_rewards = []
for score, episode in episodes:
rewards = np.array([reward for j, observation, action, reward in episode])
N = len(episode)
D = np.sum((np.eye(N, k=i) * discounted_reward ** i for i in range(N)))
discounted_rewards.append(np.dot(D, rewards))
print(min(map(np.min, discounted_rewards)), max(map(np.max, discounted_rewards)))
# Normalize rewards
for i in range(len(discounted_rewards)):
discounted_rewards[i] = 2 * (discounted_rewards[i] - min_reward) / (max_reward - min_reward) - 1.0
print(min(map(np.min, discounted_rewards)), max(map(np.max, discounted_rewards)))
print("discounted & normalized reward compute time {0}".format(time.time() - t0))
t0 = time.time()
# Randomly choose subset of episodes for evaluation of genome reward estimation.
comparison_episodes = [random.choice(episodes)[1] for _ in range(10)]
jobs = []
for genome, net in nets:
jobs.append(self.pool.apply_async(compute_fitness, (net, discounted_rewards, comparison_episodes)))
# Assign a composite fitness to each genome; genomes can make progress either
# by improving their total reward or by making more accurate reward estimates.
for job, (genome_id, genome) in zip(jobs, genomes):
reward_error = job.get(timeout=None)
genome.fitness -= 150 * np.mean(reward_error)
print("final fitness compute time {0}\n".format(time.time() - t0))
def run():
# Load the config file, which is assumed to live in
# the same directory as this script.
local_dir = os.path.dirname(__file__)
config_path = os.path.join(local_dir, 'config')
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
neat.DefaultSpeciesSet, neat.DefaultStagnation,
config_path)
pop = neat.Population(config)
stats = neat.StatisticsReporter()
pop.add_reporter(stats)
pop.add_reporter(neat.StdOutReporter(True))
# Checkpoint every 10 generations or 900 seconds.
pop.add_reporter(neat.Checkpointer(10, 900))
# Run until the winner from a generation is able to solve the environment
# or the user interrupts the process.
ec = PooledErrorCompute()
while 1:
try:
pop.run(ec.evaluate_genomes, 1)
visualize.plot_stats(stats, ylog=False, view=False, filename="fitness.svg")
if score_range:
S = np.array(score_range).T
plt.plot(S[0], 'r-')
plt.plot(S[1], 'b-')
plt.plot(S[2], 'g-')
plt.grid()
plt.savefig("score-ranges.svg")
plt.close()
mfs = sum(stats.get_fitness_mean()[-5:]) / 5.0
print("Average mean fitness over last 5 generations: {0}".format(mfs))
mfs = sum(stats.get_fitness_stat(min)[-5:]) / 5.0
print("Average min fitness over last 5 generations: {0}".format(mfs))
# Use the five best genomes seen so far as an ensemble-ish control system.
best_genomes = stats.best_unique_genomes(5)
best_networks = []
for g in best_genomes:
best_networks.append(neat.nn.FeedForwardNetwork.create(g, config))
solved = True
best_scores = []
for k in range(100):
observation = env.reset()
score = 0
while 1:
# Use the total reward estimates from all five networks to
# determine the best action given the current state.
total_rewards = np.zeros((4,))
for n in best_networks:
output = n.activate(observation)
total_rewards += output
best_action = np.argmax(total_rewards)
observation, reward, done, info = env.step(best_action)
score += reward
env.render()
if done:
break
best_scores.append(score)
avg_score = sum(best_scores) / len(best_scores)
print(k, score, avg_score)
if avg_score < 200:
solved = False
break
if solved:
print("Solved.")
# Save the winners.
for n, g in enumerate(best_genomes):
name = 'winner-{0}'.format(n)
with open(name+'.pickle', 'wb') as f:
pickle.dump(g, f)
visualize.draw_net(config, g, view=False, filename=name + "-net.gv")
visualize.draw_net(config, g, view=False, filename="-net-enabled.gv",
show_disabled=False)
visualize.draw_net(config, g, view=False, filename="-net-enabled-pruned.gv",
show_disabled=False, prune_unused=True)
break
except KeyboardInterrupt:
print("User break.")
break
env.close()
if __name__ == '__main__':
run()
from __future__ import print_function
import copy
import warnings
import graphviz
import matplotlib.pyplot as plt
import numpy as np
def plot_stats(statistics, ylog=False, view=False, filename='avg_fitness.svg'):
""" Plots the population's average and best fitness. """
if plt is None:
warnings.warn("This display is not available due to a missing optional dependency (matplotlib)")
return
generation = range(len(statistics.most_fit_genomes))
best_fitness = [c.fitness for c in statistics.most_fit_genomes]
avg_fitness = np.array(statistics.get_fitness_mean())
stdev_fitness = np.array(statistics.get_fitness_stdev())
plt.plot(generation, avg_fitness, 'b-', label="average")
#plt.plot(generation, avg_fitness - stdev_fitness, 'g-.', label="-1 sd")
plt.plot(generation, avg_fitness + stdev_fitness, 'g-.', label="+1 sd")
plt.plot(generation, best_fitness, 'r-', label="best")
plt.title("Population's average and best fitness")
plt.xlabel("Generations")
plt.ylabel("Fitness")
plt.grid()
plt.legend(loc="best")
if ylog:
plt.gca().set_yscale('symlog')
plt.savefig(filename)
if view:
plt.show()
plt.close()
def plot_species(statistics, view=False, filename='speciation.svg'):
""" Visualizes speciation throughout evolution. """
if plt is None:
warnings.warn("This display is not available due to a missing optional dependency (matplotlib)")
return
species_sizes = statistics.get_species_sizes()
num_generations = len(species_sizes)
curves = np.array(species_sizes).T
fig, ax = plt.subplots()
ax.stackplot(range(num_generations), *curves)
plt.title("Speciation")
plt.ylabel("Size per Species")
plt.xlabel("Generations")
plt.savefig(filename)
if view:
plt.show()
plt.close()
def draw_net(config, genome, view=False, filename=None, node_names=None, show_disabled=True, prune_unused=False,
node_colors=None, fmt='svg'):
""" Receives a genome and draws a neural network with arbitrary topology. """
# Attributes for network nodes.
if graphviz is None:
warnings.warn("This display is not available due to a missing optional dependency (graphviz)")
return
if node_names is None:
node_names = {}
assert type(node_names) is dict
if node_colors is None:
node_colors = {}
assert type(node_colors) is dict
node_attrs = {
'shape': 'circle',
'fontsize': '9',
'height': '0.2',
'width': '0.2'}
dot = graphviz.Digraph(format=fmt, node_attr=node_attrs)
inputs = set()
for k in config.genome_config.input_keys:
print("input %d" % k)
inputs.add(k)
name = node_names.get(k, str(k))
input_attrs = {'style': 'filled',
'shape': 'box'}
input_attrs['fillcolor'] = node_colors.get(k, 'lightgray')
dot.node(name, _attributes=input_attrs)
outputs = set()
for k in config.genome_config.output_keys:
print("output %d" % k)
outputs.add(k)
name = node_names.get(k, str(k))
node_attrs = {'style': 'filled'}
node_attrs['fillcolor'] = node_colors.get(k, 'lightblue')
dot.node(name, _attributes=node_attrs)
if prune_unused:
connections = set()
for cg in genome.connections.values():
if cg.enabled or show_disabled:
connections.add(cg.key)
used_nodes = copy.copy(outputs)
pending = copy.copy(outputs)
while pending:
#print(pending, used_nodes)
new_pending = set()
for a, b in connections:
if b in pending and a not in used_nodes:
new_pending.add(a)
used_nodes.add(a)
pending = new_pending
else:
used_nodes = set(genome.nodes.keys())
for n in used_nodes:
if n in inputs or n in outputs:
continue
print("hidden %d" % n)
attrs = {'style': 'filled'}
attrs['fillcolor'] = node_colors.get(n, 'white')
dot.node(str(n), _attributes=attrs)
for cg in genome.connections.values():
if cg.enabled or show_disabled:
#if cg.input not in used_nodes or cg.output not in used_nodes:
# continue
input, output = cg.key
a = node_names.get(input, str(input))
b = node_names.get(output, str(output))
style = 'solid' if cg.enabled else 'dotted'
color = 'green' if cg.weight > 0 else 'red'
width = str(0.1 + abs(cg.weight / 5.0))
dot.edge(a, b, _attributes={'style': style, 'color': color, 'penwidth': width})
dot.render(filename, view=view)
return dot
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment