Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save pablocastilla/9d51d261fdf6826718b4f37fe3291205 to your computer and use it in GitHub Desktop.
Save pablocastilla/9d51d261fdf6826718b4f37fe3291205 to your computer and use it in GitHub Desktop.
Evolutionary resolution of lunar lander v2
## Evolutionary resolution of lunar lander v2
import gym
from gym import wrappers
import numpy as np
import concurrent.futures
import logging
import threading
gym.undo_logger_setup()
EXPERIMENTPATH = '/tmp/LunarLander-experiment-1'
main_env = gym.make('LunarLander-v2')
main_env = wrappers.Monitor(main_env, EXPERIMENTPATH,force=True)
print(main_env.observation_space.shape[0])
print(main_env.action_space.n)
MAX_EPISODES = 1000
MAX_STEPS = 250
DO_NOTHING_ACTION = 0
POPULATION=100
LEARNING_RATE_EXPLORING= 0.0002 # Learning rate
LEARNING_RATE_MATURE= 0.0002 # Learning rate
SIGMA=0.1
mutation_environments = []
for i in range(POPULATION):
mutation_environments.append(gym.make('LunarLander-v2'))
class EvolutionaryNetWork:
def __init__(self, sigma=0.01, state_size=8,
action_size=4, population_size=100):
self.population_size=population_size
self.action_size =action_size
self.state_size =state_size
self.sigma = sigma
self.weight= np.random.rand(state_size, action_size)
def generate_mutations(self):
mutations=[]
noise = np.random.randn(self.population_size, self.state_size, self.action_size)
for i in range(self.population_size):
mutations.append(self.weight+ self.sigma * noise[i])
np_mutations=np.array(mutations)
return np_mutations.reshape(self.population_size, self.state_size, self.action_size), noise
def update_genes(self,total_rewards, noise, learning_rate):
weighted_noise = np.matmul(noise.T, total_rewards).T
self.weight = self.weight + learning_rate / (self.population_size * self.sigma) * weighted_noise
def run_episode(weight,env,show = False):
state = env.reset()
total_reward = 0
done = False
step = 0
while not done:
if(show):
env.render()
if step < MAX_STEPS:
action = np.matmul(weight.T, state)
move = np.argmax(action)
else:
move = DO_NOTHING_ACTION
state, reward, done, _ = env.step(move)
step += 1
total_reward += reward
return total_reward
genes = EvolutionaryNetWork(population_size=POPULATION,sigma=SIGMA)
#run episodes
for ep in range(MAX_EPISODES):
show=False
if(ep%100==0):
show=True
#run episode with current genes
current_gen_eval = run_episode(genes.weight,main_env,show)
mutations,noise = genes.generate_mutations()
#run mutations in parallel
total_rewards = np.zeros(POPULATION)
with concurrent.futures.ThreadPoolExecutor() as executor:
for i in range(POPULATION):
future=executor.submit(run_episode,mutations[i],mutation_environments[i],False)
total_rewards[i] = future.result()
#select LR
learning_rate=LEARNING_RATE_EXPLORING
if(current_gen_eval>200):
learning_rate=LEARNING_RATE_MATURE
#update genes
genes.update_genes(total_rewards, noise, learning_rate)
gen_mean = np.mean(total_rewards)
if(ep%1==0):
#print(genes.weight)
print(ep, ': ',current_gen_eval,' ',gen_mean)
main_env.close()
for i in range(POPULATION):
mutation_environments[i].close()
@zannos
Copy link

zannos commented Jun 3, 2017

Hi Pablo!
I am trying to implement a evolutionary resolution learning strategy to solve lunar lander. I see you have done something similar. Can you comment on what state_size and population_size is?
Thanks!

@pablocastilla
Copy link
Author

Hi!

State size is the size of the state that the environment gives you in each step: Position, speed, etc. In this case it has 8 parameters.

Population is the number of mutations at each step.

Any time!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment