Skip to content

Instantly share code, notes, and snippets.

@RileyLazarou
Created November 11, 2019 15:35
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RileyLazarou/846e547725ea239c3edfcdcb99a9eb7a to your computer and use it in GitHub Desktop.
Save RileyLazarou/846e547725ea239c3edfcdcb99a9eb7a to your computer and use it in GitHub Desktop.
# Set up the environment and collect the observation space and action space sizes
env = gym.make("CartPole-v1")
observation_space = env.observation_space.shape[0]
action_space = env.action_space.n
# The function for creating the initial population
organism_creator = lambda : Organism([observation_space, 16, 16, 16, action_space], output='softmax')
def simulate_and_evaluate(organism, trials=1):
"""
Run the environment `trials` times, using the organism as the agent
Return the average number of timesteps survived
"""
fitness = 0
for i in range(trials):
state = env.reset() # Get the initial state
while True:
fitness += 1
action = organism.predict(state.reshape((1,-1)))
action = np.argmax(action.flatten())
state, reward, terminal, info = env.step(action)
if terminal: # break if the agent wins or loses
break
return fitness / trials
# Create the scoring function and build the ecosystem
scoring_function = lambda organism : simulate_and_evaluate(organism, trials=5)
ecosystem = Ecosystem(organism_creator, scoring_function,
population_size=100, holdout=0.1, mating=True)
generations = 200
for i in range(generations):
ecosystem.generation()
# [Visualization code omitted]
if this_generation_best[1] == 500: # Stop when an organism achieves a perfect score
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment