mpasternak/pygad_torch_pool.py

## pygad_torch_pool.py
import time
import gym
import numpy as np
import pygad.torchga
import pygad
import torch
import torch.nn as nn
from multiprocessing import Pool


def fitness_func(solution, sol_idx):
    global model, observation_space_size, env

    model_weights_dict = pygad.torchga.model_weights_as_dict(model=model, weights_vector=solution)
    model.load_state_dict(model_weights_dict)

    # play game
    observation = env.reset()
    sum_reward = 0
    done = False
    while (not done) and (sum_reward < 1000):
        # env.render()
        ob_tensor = torch.tensor(observation.copy(), dtype=torch.float)
        q_values = model(ob_tensor)
        action = np.argmax(q_values).numpy()
        observation_next, reward, done, info = env.step(action)
        observation = observation_next
        sum_reward += reward

    return sum_reward


def callback_generation(ga_instance):
    print("Generation = {generation}".format(generation=ga_instance.generations_completed))
    print("Fitness    = {fitness}".format(fitness=ga_instance.best_solution()[1]))


def fitness_wrapper(solution):
    return fitness_func(solution, 0)


class PooledGA(pygad.GA):

    def cal_pop_fitness(self):
        global pool

        pop_fitness = pool.map(fitness_wrapper, self.population)
        print(pop_fitness)
        pop_fitness = np.array(pop_fitness)
        return pop_fitness


env = gym.make("CartPole-v1")
observation_space_size = env.observation_space.shape[0]

action_space_size = env.action_space.n

torch.set_grad_enabled(False)

model = nn.Sequential(
    nn.Linear(observation_space_size, 16),
    nn.ReLU(),
    nn.Linear(16, 16),
    nn.ReLU(),
    nn.Linear(16, action_space_size)
)

torch_ga = pygad.torchga.TorchGA(model=model, num_solutions=10)

# Prepare the PyGAD parameters. Check the documentation for more information: https://pygad.readthedocs.io/en/latest/README_pygad_ReadTheDocs.html#pygad-ga-class
num_generations = 50  # Number of generations.
num_parents_mating = 5  # Number of solutions to be selected as parents in the mating pool.
initial_population = torch_ga.population_weights  # Initial population of network weights
parent_selection_type = "sss"  # Type of parent selection.
crossover_type = "single_point"  # Type of the crossover operator.
mutation_type = "random"  # Type of the mutation operator.
mutation_percent_genes = 10  # Percentage of genes to mutate. This parameter has no action if the parameter mutation_num_genes exists.
keep_parents = -1  # Number of parents to keep in the next population. -1 means keep all parents and 0 means keep nothing.

start_time = time.time()

# ga_instance = pygad.GA(num_generations=num_generations,
#                        num_parents_mating=num_parents_mating,
#                        initial_population=initial_population,
#                        fitness_func=fitness_func,
#                        parent_selection_type=parent_selection_type,
#                        crossover_type=crossover_type,
#                        mutation_type=mutation_type,
#                        mutation_percent_genes=mutation_percent_genes,
#                        keep_parents=keep_parents,
#                        on_generation=callback_generation)
#
# ga_instance.run()

ga_instance = PooledGA(num_generations=num_generations,
                       num_parents_mating=num_parents_mating,
                       initial_population=initial_population,
                       fitness_func=fitness_func,
                       parent_selection_type=parent_selection_type,
                       crossover_type=crossover_type,
                       mutation_type=mutation_type,
                       mutation_percent_genes=mutation_percent_genes,
                       keep_parents=keep_parents,
                       on_generation=callback_generation)

with Pool(processes=10) as pool:
    ga_instance.run()

    solution, solution_fitness, solution_idx = ga_instance.best_solution()
    print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))
    print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx))

    print("--- %s seconds ---" % (time.time() - start_time))

    model_weights_dict = pygad.torchga.model_weights_as_dict(model=model, weights_vector=solution)
    model.load_state_dict(model_weights_dict)

    # play game
    observation = env.reset()
    sum_reward = 0
    done = False
    while not done:
        env.render()
        ob_tensor = torch.tensor(observation.copy(), dtype=torch.float)
        q_values = model(ob_tensor)
        action = np.argmax(q_values).numpy()
        observation_next, reward, done, info = env.step(action)
        observation = observation_next
        sum_reward += reward

    print("Sum reward: " + str(sum_reward))

# After the generations complete, some plots are showed that summarize how the outputs/fitness values evolve over generations.
ga_instance.plot_result(title="PyGAD & Keras - Iteration vs. Fitness", linewidth=4)
	import time
	import gym
	import numpy as np
	import pygad.torchga
	import pygad
	import torch
	import torch.nn as nn
	from multiprocessing import Pool


	def fitness_func(solution, sol_idx):
	global model, observation_space_size, env

	model_weights_dict = pygad.torchga.model_weights_as_dict(model=model, weights_vector=solution)
	model.load_state_dict(model_weights_dict)

	# play game
	observation = env.reset()
	sum_reward = 0
	done = False
	while (not done) and (sum_reward < 1000):
	# env.render()
	ob_tensor = torch.tensor(observation.copy(), dtype=torch.float)
	q_values = model(ob_tensor)
	action = np.argmax(q_values).numpy()
	observation_next, reward, done, info = env.step(action)
	observation = observation_next
	sum_reward += reward

	return sum_reward


	def callback_generation(ga_instance):
	print("Generation = {generation}".format(generation=ga_instance.generations_completed))
	print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1]))


	def fitness_wrapper(solution):
	return fitness_func(solution, 0)


	class PooledGA(pygad.GA):

	def cal_pop_fitness(self):
	global pool

	pop_fitness = pool.map(fitness_wrapper, self.population)
	print(pop_fitness)
	pop_fitness = np.array(pop_fitness)
	return pop_fitness


	env = gym.make("CartPole-v1")
	observation_space_size = env.observation_space.shape[0]

	action_space_size = env.action_space.n

	torch.set_grad_enabled(False)

	model = nn.Sequential(
	nn.Linear(observation_space_size, 16),
	nn.ReLU(),
	nn.Linear(16, 16),
	nn.ReLU(),
	nn.Linear(16, action_space_size)
	)

	torch_ga = pygad.torchga.TorchGA(model=model, num_solutions=10)

	# Prepare the PyGAD parameters. Check the documentation for more information: https://pygad.readthedocs.io/en/latest/README_pygad_ReadTheDocs.html#pygad-ga-class
	num_generations = 50 # Number of generations.
	num_parents_mating = 5 # Number of solutions to be selected as parents in the mating pool.
	initial_population = torch_ga.population_weights # Initial population of network weights
	parent_selection_type = "sss" # Type of parent selection.
	crossover_type = "single_point" # Type of the crossover operator.
	mutation_type = "random" # Type of the mutation operator.
	mutation_percent_genes = 10 # Percentage of genes to mutate. This parameter has no action if the parameter mutation_num_genes exists.
	keep_parents = -1 # Number of parents to keep in the next population. -1 means keep all parents and 0 means keep nothing.

	start_time = time.time()

	# ga_instance = pygad.GA(num_generations=num_generations,
	# num_parents_mating=num_parents_mating,
	# initial_population=initial_population,
	# fitness_func=fitness_func,
	# parent_selection_type=parent_selection_type,
	# crossover_type=crossover_type,
	# mutation_type=mutation_type,
	# mutation_percent_genes=mutation_percent_genes,
	# keep_parents=keep_parents,
	# on_generation=callback_generation)
	#
	# ga_instance.run()

	ga_instance = PooledGA(num_generations=num_generations,
	num_parents_mating=num_parents_mating,
	initial_population=initial_population,
	fitness_func=fitness_func,
	parent_selection_type=parent_selection_type,
	crossover_type=crossover_type,
	mutation_type=mutation_type,
	mutation_percent_genes=mutation_percent_genes,
	keep_parents=keep_parents,
	on_generation=callback_generation)

	with Pool(processes=10) as pool:
	ga_instance.run()

	solution, solution_fitness, solution_idx = ga_instance.best_solution()
	print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))
	print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx))

	print("--- %s seconds ---" % (time.time() - start_time))

	model_weights_dict = pygad.torchga.model_weights_as_dict(model=model, weights_vector=solution)
	model.load_state_dict(model_weights_dict)

	# play game
	observation = env.reset()
	sum_reward = 0
	done = False
	while not done:
	env.render()
	ob_tensor = torch.tensor(observation.copy(), dtype=torch.float)
	q_values = model(ob_tensor)
	action = np.argmax(q_values).numpy()
	observation_next, reward, done, info = env.step(action)
	observation = observation_next
	sum_reward += reward

	print("Sum reward: " + str(sum_reward))

	# After the generations complete, some plots are showed that summarize how the outputs/fitness values evolve over generations.
	ga_instance.plot_result(title="PyGAD & Keras - Iteration vs. Fitness", linewidth=4)