Skip to content

Instantly share code, notes, and snippets.

@mpasternak
Forked from TheBojda/pygad_torch_pool.py
Created May 20, 2022 12:11
Show Gist options
  • Save mpasternak/b2289f0e9acf05852aba53cb2c92294b to your computer and use it in GitHub Desktop.
Save mpasternak/b2289f0e9acf05852aba53cb2c92294b to your computer and use it in GitHub Desktop.
Reinforcement learning on multiple CPUs with Genetic Algorithm using PyGAD, PyTorch, Open AI Gym (CartPole) and multiprocessing.Pool
import time
import gym
import numpy as np
import pygad.torchga
import pygad
import torch
import torch.nn as nn
from multiprocessing import Pool
def fitness_func(solution, sol_idx):
global model, observation_space_size, env
model_weights_dict = pygad.torchga.model_weights_as_dict(model=model, weights_vector=solution)
model.load_state_dict(model_weights_dict)
# play game
observation = env.reset()
sum_reward = 0
done = False
while (not done) and (sum_reward < 1000):
# env.render()
ob_tensor = torch.tensor(observation.copy(), dtype=torch.float)
q_values = model(ob_tensor)
action = np.argmax(q_values).numpy()
observation_next, reward, done, info = env.step(action)
observation = observation_next
sum_reward += reward
return sum_reward
def callback_generation(ga_instance):
print("Generation = {generation}".format(generation=ga_instance.generations_completed))
print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1]))
def fitness_wrapper(solution):
return fitness_func(solution, 0)
class PooledGA(pygad.GA):
def cal_pop_fitness(self):
global pool
pop_fitness = pool.map(fitness_wrapper, self.population)
print(pop_fitness)
pop_fitness = np.array(pop_fitness)
return pop_fitness
env = gym.make("CartPole-v1")
observation_space_size = env.observation_space.shape[0]
action_space_size = env.action_space.n
torch.set_grad_enabled(False)
model = nn.Sequential(
nn.Linear(observation_space_size, 16),
nn.ReLU(),
nn.Linear(16, 16),
nn.ReLU(),
nn.Linear(16, action_space_size)
)
torch_ga = pygad.torchga.TorchGA(model=model, num_solutions=10)
# Prepare the PyGAD parameters. Check the documentation for more information: https://pygad.readthedocs.io/en/latest/README_pygad_ReadTheDocs.html#pygad-ga-class
num_generations = 50 # Number of generations.
num_parents_mating = 5 # Number of solutions to be selected as parents in the mating pool.
initial_population = torch_ga.population_weights # Initial population of network weights
parent_selection_type = "sss" # Type of parent selection.
crossover_type = "single_point" # Type of the crossover operator.
mutation_type = "random" # Type of the mutation operator.
mutation_percent_genes = 10 # Percentage of genes to mutate. This parameter has no action if the parameter mutation_num_genes exists.
keep_parents = -1 # Number of parents to keep in the next population. -1 means keep all parents and 0 means keep nothing.
start_time = time.time()
# ga_instance = pygad.GA(num_generations=num_generations,
# num_parents_mating=num_parents_mating,
# initial_population=initial_population,
# fitness_func=fitness_func,
# parent_selection_type=parent_selection_type,
# crossover_type=crossover_type,
# mutation_type=mutation_type,
# mutation_percent_genes=mutation_percent_genes,
# keep_parents=keep_parents,
# on_generation=callback_generation)
#
# ga_instance.run()
ga_instance = PooledGA(num_generations=num_generations,
num_parents_mating=num_parents_mating,
initial_population=initial_population,
fitness_func=fitness_func,
parent_selection_type=parent_selection_type,
crossover_type=crossover_type,
mutation_type=mutation_type,
mutation_percent_genes=mutation_percent_genes,
keep_parents=keep_parents,
on_generation=callback_generation)
with Pool(processes=10) as pool:
ga_instance.run()
solution, solution_fitness, solution_idx = ga_instance.best_solution()
print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))
print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx))
print("--- %s seconds ---" % (time.time() - start_time))
model_weights_dict = pygad.torchga.model_weights_as_dict(model=model, weights_vector=solution)
model.load_state_dict(model_weights_dict)
# play game
observation = env.reset()
sum_reward = 0
done = False
while not done:
env.render()
ob_tensor = torch.tensor(observation.copy(), dtype=torch.float)
q_values = model(ob_tensor)
action = np.argmax(q_values).numpy()
observation_next, reward, done, info = env.step(action)
observation = observation_next
sum_reward += reward
print("Sum reward: " + str(sum_reward))
# After the generations complete, some plots are showed that summarize how the outputs/fitness values evolve over generations.
ga_instance.plot_result(title="PyGAD & Keras - Iteration vs. Fitness", linewidth=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment