Skip to content

Instantly share code, notes, and snippets.

@StuartFarmer
Last active August 11, 2016 22:52
Show Gist options
  • Save StuartFarmer/d9a8dd7676f81a4a5f5c5db66490e9e6 to your computer and use it in GitHub Desktop.
Save StuartFarmer/d9a8dd7676f81a4a5f5c5db66490e9e6 to your computer and use it in GitHub Desktop.
MountainCar with NEAT
from __future__ import print_function
import gym
import numpy as np
import itertools
import os
from neat import nn, population, statistics
# another great example of how crazy effective NEAT can be even when it knows literally nothing about what's going on in the environment.
# of course, it's all luck in hitting those super fit organisms, so it's best to have a good processor
# also, I spent more time tuning the parameters for evolution and the neural net than I spent writing this code. that's the real challenge with neat.
np.set_printoptions(threshold=np.inf)
env = gym.make('MountainCar-v0')
# run through the population
def eval_fitness(genomes):
for g in genomes:
observation = env.reset()
env.render()
net = nn.create_feed_forward_phenotype(g)
frames = 0
fitness = 0
while 1:
inputs = observation
output = net.serial_activate(inputs)
action = round(output[0]) + 1
observation, reward, done, info = env.step(action)
env.render()
frames += 1
if (frames >= 500):
done = True
if done:
fitness = -(frames**2)
print(fitness, frames)
env.reset()
break
# evaluate the fitness
g.fitness = fitness
env.monitor.start('gym_results/MountainCar-v0-experiment-1',force = True)
local_dir = os.path.dirname(__file__)
config_path = os.path.join(local_dir, 'mountainCar_config')
pop = population.Population(config_path)
pop.run(eval_fitness, 300)
winner = pop.statistics.best_genome()
print("Winner found with fitness of ", winner.fitness)
print("Fitness is novel to this solution. Now testing consecutative trials")
streak = 0
observation = env.reset()
env.render()
winningnet = nn.create_feed_forward_phenotype(winner)
while (streak < 100):
frames = 0
fitness = 0
while 1:
inputs = observation
output = winningnet.serial_activate(inputs)
action = round(output[0]) + 1
observation, reward, done, info = env.step(action)
env.render()
frames += 1
if (frames >= 100):
streak = 0
done = True
if done:
streak += 1
print("streak: ", streak)
fitness = -(frames**2)
print(fitness, frames)
env.reset()
break
env.monitor.close()
gym.upload('gym_results/MountainCar-v0-experiment-1', api_key='sk_LhbQ5JJASK6f8ZJqHWBbGw')
@StuartFarmer
Copy link
Author

StuartFarmer commented Aug 7, 2016

@evangravelle
Copy link

The gif and solve time do not look right, compare to some other solutions, are you sure you were outputting valid actions?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment