Last active
August 11, 2016 22:52
-
-
Save StuartFarmer/d9a8dd7676f81a4a5f5c5db66490e9e6 to your computer and use it in GitHub Desktop.
MountainCar with NEAT
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import gym | |
import numpy as np | |
import itertools | |
import os | |
from neat import nn, population, statistics | |
# another great example of how crazy effective NEAT can be even when it knows literally nothing about what's going on in the environment. | |
# of course, it's all luck in hitting those super fit organisms, so it's best to have a good processor | |
# also, I spent more time tuning the parameters for evolution and the neural net than I spent writing this code. that's the real challenge with neat. | |
np.set_printoptions(threshold=np.inf) | |
env = gym.make('MountainCar-v0') | |
# run through the population | |
def eval_fitness(genomes): | |
for g in genomes: | |
observation = env.reset() | |
env.render() | |
net = nn.create_feed_forward_phenotype(g) | |
frames = 0 | |
fitness = 0 | |
while 1: | |
inputs = observation | |
output = net.serial_activate(inputs) | |
action = round(output[0]) + 1 | |
observation, reward, done, info = env.step(action) | |
env.render() | |
frames += 1 | |
if (frames >= 500): | |
done = True | |
if done: | |
fitness = -(frames**2) | |
print(fitness, frames) | |
env.reset() | |
break | |
# evaluate the fitness | |
g.fitness = fitness | |
env.monitor.start('gym_results/MountainCar-v0-experiment-1',force = True) | |
local_dir = os.path.dirname(__file__) | |
config_path = os.path.join(local_dir, 'mountainCar_config') | |
pop = population.Population(config_path) | |
pop.run(eval_fitness, 300) | |
winner = pop.statistics.best_genome() | |
print("Winner found with fitness of ", winner.fitness) | |
print("Fitness is novel to this solution. Now testing consecutative trials") | |
streak = 0 | |
observation = env.reset() | |
env.render() | |
winningnet = nn.create_feed_forward_phenotype(winner) | |
while (streak < 100): | |
frames = 0 | |
fitness = 0 | |
while 1: | |
inputs = observation | |
output = winningnet.serial_activate(inputs) | |
action = round(output[0]) + 1 | |
observation, reward, done, info = env.step(action) | |
env.render() | |
frames += 1 | |
if (frames >= 100): | |
streak = 0 | |
done = True | |
if done: | |
streak += 1 | |
print("streak: ", streak) | |
fitness = -(frames**2) | |
print(fitness, frames) | |
env.reset() | |
break | |
env.monitor.close() | |
gym.upload('gym_results/MountainCar-v0-experiment-1', api_key='sk_LhbQ5JJASK6f8ZJqHWBbGw') |
The gif and solve time do not look right, compare to some other solutions, are you sure you were outputting valid actions?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Config file is here:
https://gist.github.com/StuartFarmer/0eb392275e9628498a3d25e6d5fb7d42