Skip to content

Instantly share code, notes, and snippets.

@DMTSource
Last active April 19, 2023 10:00
Show Gist options
  • Save DMTSource/b80f1afb854f688dcccc4d60b18a721f to your computer and use it in GitHub Desktop.
Save DMTSource/b80f1afb854f688dcccc4d60b18a721f to your computer and use it in GitHub Desktop.
Example of deap symbreg(GP) via Ray for cluster and local machine scaling with efficient memory objects
# This file is part of EAP.
#
# EAP is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version.
#
# EAP is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with EAP. If not, see <http://www.gnu.org/licenses/>.
import operator
import math
import random
import numpy as np
import pandas as pd
from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp
########## !!!!!!!!!!!!!!
import ray
ray.init()#num_cpus=1
# Define new functions
def protectedDiv(left, right):
with np.errstate(divide='ignore',invalid='ignore'):
x = np.divide(left, right)
if isinstance(x, np.ndarray):
x[np.isinf(x)] = 1
x[np.isnan(x)] = 1
elif np.isinf(x) or np.isnan(x):
x = 1
return x
pset = gp.PrimitiveSet("MAIN", 1)
pset.addPrimitive(np.add, 2, name="vadd")
pset.addPrimitive(np.subtract, 2, name="vsub")
pset.addPrimitive(np.multiply, 2, name="vmul")
pset.addPrimitive(protectedDiv, 2)
pset.addPrimitive(np.negative, 1, name="vneg")
pset.addPrimitive(np.cos, 1, name="vcos")
pset.addPrimitive(np.sin, 1, name="vsin")
pset.addEphemeralConstant("rand101", lambda: random.randint(-1,1))
pset.renameArguments(ARG0='x')
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)
samples = np.linspace(-1, 1, 1000000)
values = 250. + (samples**4 + samples**3 + samples**2 + samples)
# need an expensive dataframe to show off fancy shared memory items, very useful vs duplicating global scope!
df_data = pd.DataFrame(index=pd.date_range(end='1/1/2020', periods=len(samples), freq='5S'),
data=np.vstack([samples, values]).T,
columns=['samples','f'])
########## !!!!!!!!!!!!!!
df_data_obj_id = ray.put(df_data)
########## !!!!!!!!!!!!!!
@ray.remote
def evalSymbReg(func):
# get data from shared mem obj
data = ray.get(df_data_obj_id)
#func = toolbox.compile(expr=individual)
# Evaluate the sum of squared difference between the expression
# and the real function values : x**4 + x**3 + x**2 + x
diff = np.sum((func(data.samples.values) - data.f.values)**2)
return diff,
########## !!!!!!!!!!!!!!
def ray_mapper(f, individuals):
# We are not duplicating global scope on workers so we need to make use of toolbox here!
# Transform the tree expression in a callable function
runnables = [toolbox.compile(expr=ind) for ind in individuals]
# Test remote usage
#evalSymbReg.remote(runnables[0])
fitnesses = ray.get([f.remote(ind) for ind in runnables])
return fitnesses
toolbox.register("evaluate", evalSymbReg)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register('mutate', gp.mutUniform, expr=toolbox.expr_mut, pset=pset)
toolbox.register("map", ray_mapper)
def main():
random.seed(318)
pop = toolbox.population(n=300)
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("min", np.min)
stats.register("max", np.max)
algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 40, stats, halloffame=hof)
return pop, stats, hof
if __name__ == "__main__":
main()
@DMTSource
Copy link
Author

Example output:

2020-03-01 23:56:23,337	INFO resource_spec.py:205 -- Starting Ray with 2.93 GiB memory available for workers and up to 1.47 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
gen	nevals	avg      	std        	min      	max        
0  	300   	1.977e+11	7.41659e+11	6.177e+10	9.93234e+12
1  	146   	7.10191e+10	1.42282e+11	6.17693e+10	2.52966e+12
2  	169   	1.03511e+11	3.15897e+11	6.17693e+10	2.53017e+12
3  	167   	9.66034e+10	2.83889e+11	6.17693e+10	2.53017e+12
4  	153   	1.40714e+11	4.2727e+11 	6.12739e+10	2.53017e+12
5  	157   	9.49998e+10	5.6888e+11 	6.12739e+10	9.93185e+12
6  	175   	7.88205e+10	1.58639e+11	6.12739e+10	2.52966e+12
7  	168   	6.33721e+10	2.39013e+10	6.12732e+10	4.76584e+11
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment