PuZZleDucK/atari-7-genetic.py

## atari-7-genetic.py
#!/usr/bin/python
# Simple Genetic Algorithm Atari simulation

import gym
import numpy as np
import sys
from pybrain.tools.shortcuts import buildNetwork
import random

genisis = "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
children_per_gen = 200
survivors_per_gen = 10
generations = 100
max_time = 15000

def mutate_single_gene(action_string):
    new_action = random.choice(range(6))
    new_location = random.choice(range(len(action_string)))
    return action_string[:new_location] + str(new_action) + action_string[new_location+1:]

def mutate_add_genome(action_string):
    new_actions = "{}{}{}{}{}".format(random.choice(range(6)), random.choice(range(6)), random.choice(range(6)), random.choice(range(6)), random.choice(range(6)))
    splice_location = random.choice(range(len(action_string)))
    return action_string[:splice_location] + str(new_actions) + action_string[splice_location:]

def mutate_duplicate_genome(action_string):
    copy_location = random.choice(range(len(action_string)))
    splice_location = random.choice(range(len(action_string)))
    return action_string[:splice_location] + action_string[copy_location:(copy_location+5)] + action_string[splice_location:]

def mutate_delete_genome(action_string):
    delete_location = random.choice(range(len(action_string)))
    return action_string[:delete_location] + action_string[(delete_location+5):]

def mutate_scramble_genome(action_string): #random.sample(a, len(a))
    shuffle_location = random.choice(range(len(action_string)))
    segment = action_string[shuffle_location:shuffle_location+5]
    new_segment = random.sample(segment, len(segment))
    return action_string[:shuffle_location] + "".join(new_segment) + action_string[(shuffle_location+5):]

def mutate(action_string):
    mutation_type = random.choice(range(10))
    if mutation_type <= 1: # 10% - no mutation
        return action_string
    elif mutation_type <= 4: # 30% - single gene mutation
        return mutate_single_gene(action_string)
    elif mutation_type <= 6: # 20% - add random genome
        return mutate_add_genome(action_string)
    elif mutation_type <= 7: # 10% - replicate genome
        return mutate_duplicate_genome(action_string)
    elif mutation_type <= 8: # 10% delete genome
        return mutate_delete_genome(action_string)
    else: # 20% - shuffle genome
        return mutate_scramble_genome(action_string)

def get_survivors(last_generation):
    return last_generation[(0-survivors_per_gen):]

def sort_generation(last_generation):
    sorted_gen = sorted(last_generation, key=lambda tup: tup[0])
    return_gen = []
    for score, child in sorted_gen:
        return_gen.append(child)
    print "  ::    top 5: {}".format(sorted_gen[-1])
    print "  ::         : {}".format(sorted_gen[-2])
    print "  ::         : {}".format(sorted_gen[-3])
    print "  ::         : {}".format(sorted_gen[-4])
    print "  ::         : {}".format(sorted_gen[-5])
    return return_gen

if __name__ == '__main__':
    last_generation = []
    for new_child in range(children_per_gen):
        last_generation.append(genisis)
    env = gym.make('Qbert-v0')
    env.monitor.start('/tmp/atari-experiment-7', force=True)
    env.reset()

    for generation in range(generations):
        survivors = get_survivors(last_generation)
        this_generation = []
        for child in range(children_per_gen):
            mutant = mutate( survivors[child % len(survivors)] )
            obervation = env.reset()
            total_reward = 0
            number_of_actions = len(mutant)
            for time in range(max_time):
                env.render()
                next_action = int(mutant[time % number_of_actions])
                observation, reward, done, info = env.step(next_action)
                total_reward = total_reward + reward
                if done:
                    break
            print "  ::  mutant {}: {} -- {}.".format(str(child).zfill(3), mutant, total_reward)
            score_and_child = (total_reward, mutant)
            this_generation.append(score_and_child)
        last_generation = sort_generation(this_generation)
    env.monitor.close()
	#!/usr/bin/python
	# Simple Genetic Algorithm Atari simulation

	import gym
	import numpy as np
	import sys
	from pybrain.tools.shortcuts import buildNetwork
	import random

	genisis = "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
	children_per_gen = 200
	survivors_per_gen = 10
	generations = 100
	max_time = 15000

	def mutate_single_gene(action_string):
	new_action = random.choice(range(6))
	new_location = random.choice(range(len(action_string)))
	return action_string[:new_location] + str(new_action) + action_string[new_location+1:]

	def mutate_add_genome(action_string):
	new_actions = "{}{}{}{}{}".format(random.choice(range(6)), random.choice(range(6)), random.choice(range(6)), random.choice(range(6)), random.choice(range(6)))
	splice_location = random.choice(range(len(action_string)))
	return action_string[:splice_location] + str(new_actions) + action_string[splice_location:]

	def mutate_duplicate_genome(action_string):
	copy_location = random.choice(range(len(action_string)))
	splice_location = random.choice(range(len(action_string)))
	return action_string[:splice_location] + action_string[copy_location:(copy_location+5)] + action_string[splice_location:]

	def mutate_delete_genome(action_string):
	delete_location = random.choice(range(len(action_string)))
	return action_string[:delete_location] + action_string[(delete_location+5):]

	def mutate_scramble_genome(action_string): #random.sample(a, len(a))
	shuffle_location = random.choice(range(len(action_string)))
	segment = action_string[shuffle_location:shuffle_location+5]
	new_segment = random.sample(segment, len(segment))
	return action_string[:shuffle_location] + "".join(new_segment) + action_string[(shuffle_location+5):]

	def mutate(action_string):
	mutation_type = random.choice(range(10))
	if mutation_type <= 1: # 10% - no mutation
	return action_string
	elif mutation_type <= 4: # 30% - single gene mutation
	return mutate_single_gene(action_string)
	elif mutation_type <= 6: # 20% - add random genome
	return mutate_add_genome(action_string)
	elif mutation_type <= 7: # 10% - replicate genome
	return mutate_duplicate_genome(action_string)
	elif mutation_type <= 8: # 10% delete genome
	return mutate_delete_genome(action_string)
	else: # 20% - shuffle genome
	return mutate_scramble_genome(action_string)

	def get_survivors(last_generation):
	return last_generation[(0-survivors_per_gen):]

	def sort_generation(last_generation):
	sorted_gen = sorted(last_generation, key=lambda tup: tup[0])
	return_gen = []
	for score, child in sorted_gen:
	return_gen.append(child)
	print " :: top 5: {}".format(sorted_gen[-1])
	print " :: : {}".format(sorted_gen[-2])
	print " :: : {}".format(sorted_gen[-3])
	print " :: : {}".format(sorted_gen[-4])
	print " :: : {}".format(sorted_gen[-5])
	return return_gen

	if __name__ == '__main__':
	last_generation = []
	for new_child in range(children_per_gen):
	last_generation.append(genisis)
	env = gym.make('Qbert-v0')
	env.monitor.start('/tmp/atari-experiment-7', force=True)
	env.reset()

	for generation in range(generations):
	survivors = get_survivors(last_generation)
	this_generation = []
	for child in range(children_per_gen):
	mutant = mutate( survivors[child % len(survivors)] )
	obervation = env.reset()
	total_reward = 0
	number_of_actions = len(mutant)
	for time in range(max_time):
	env.render()
	next_action = int(mutant[time % number_of_actions])
	observation, reward, done, info = env.step(next_action)
	total_reward = total_reward + reward
	if done:
	break
	print " :: mutant {}: {} -- {}.".format(str(child).zfill(3), mutant, total_reward)
	score_and_child = (total_reward, mutant)
	this_generation.append(score_and_child)
	last_generation = sort_generation(this_generation)
	env.monitor.close()