Skip to content

Instantly share code, notes, and snippets.

@PuZZleDucK
Created May 27, 2016 18:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PuZZleDucK/4a6b877964a0e67648b88bee05eeebf2 to your computer and use it in GitHub Desktop.
Save PuZZleDucK/4a6b877964a0e67648b88bee05eeebf2 to your computer and use it in GitHub Desktop.
Simple Genetic Algorithm Atari simulation for the OpenAI Gym
#!/usr/bin/python
# Simple Genetic Algorithm Atari simulation
import gym
import numpy as np
import sys
from pybrain.tools.shortcuts import buildNetwork
import random
genisis = "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
children_per_gen = 200
survivors_per_gen = 10
generations = 100
max_time = 15000
def mutate_single_gene(action_string):
new_action = random.choice(range(6))
new_location = random.choice(range(len(action_string)))
return action_string[:new_location] + str(new_action) + action_string[new_location+1:]
def mutate_add_genome(action_string):
new_actions = "{}{}{}{}{}".format(random.choice(range(6)), random.choice(range(6)), random.choice(range(6)), random.choice(range(6)), random.choice(range(6)))
splice_location = random.choice(range(len(action_string)))
return action_string[:splice_location] + str(new_actions) + action_string[splice_location:]
def mutate_duplicate_genome(action_string):
copy_location = random.choice(range(len(action_string)))
splice_location = random.choice(range(len(action_string)))
return action_string[:splice_location] + action_string[copy_location:(copy_location+5)] + action_string[splice_location:]
def mutate_delete_genome(action_string):
delete_location = random.choice(range(len(action_string)))
return action_string[:delete_location] + action_string[(delete_location+5):]
def mutate_scramble_genome(action_string): #random.sample(a, len(a))
shuffle_location = random.choice(range(len(action_string)))
segment = action_string[shuffle_location:shuffle_location+5]
new_segment = random.sample(segment, len(segment))
return action_string[:shuffle_location] + "".join(new_segment) + action_string[(shuffle_location+5):]
def mutate(action_string):
mutation_type = random.choice(range(10))
if mutation_type <= 1: # 10% - no mutation
return action_string
elif mutation_type <= 4: # 30% - single gene mutation
return mutate_single_gene(action_string)
elif mutation_type <= 6: # 20% - add random genome
return mutate_add_genome(action_string)
elif mutation_type <= 7: # 10% - replicate genome
return mutate_duplicate_genome(action_string)
elif mutation_type <= 8: # 10% delete genome
return mutate_delete_genome(action_string)
else: # 20% - shuffle genome
return mutate_scramble_genome(action_string)
def get_survivors(last_generation):
return last_generation[(0-survivors_per_gen):]
def sort_generation(last_generation):
sorted_gen = sorted(last_generation, key=lambda tup: tup[0])
return_gen = []
for score, child in sorted_gen:
return_gen.append(child)
print " :: top 5: {}".format(sorted_gen[-1])
print " :: : {}".format(sorted_gen[-2])
print " :: : {}".format(sorted_gen[-3])
print " :: : {}".format(sorted_gen[-4])
print " :: : {}".format(sorted_gen[-5])
return return_gen
if __name__ == '__main__':
last_generation = []
for new_child in range(children_per_gen):
last_generation.append(genisis)
env = gym.make('Qbert-v0')
env.monitor.start('/tmp/atari-experiment-7', force=True)
env.reset()
for generation in range(generations):
survivors = get_survivors(last_generation)
this_generation = []
for child in range(children_per_gen):
mutant = mutate( survivors[child % len(survivors)] )
obervation = env.reset()
total_reward = 0
number_of_actions = len(mutant)
for time in range(max_time):
env.render()
next_action = int(mutant[time % number_of_actions])
observation, reward, done, info = env.step(next_action)
total_reward = total_reward + reward
if done:
break
print " :: mutant {}: {} -- {}.".format(str(child).zfill(3), mutant, total_reward)
score_and_child = (total_reward, mutant)
this_generation.append(score_and_child)
last_generation = sort_generation(this_generation)
env.monitor.close()
@whusym
Copy link

whusym commented Dec 11, 2017

Thank you so much! Based on your method, I create an even simpler one (only using crossover and single mutation) :)

https://github.com/whusym/gym_practice/blob/master/simpleGA.py

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment