foolnotion/deap-symreg.py

## deap-symreg.py
#    This file is part of EAP.
#
#    EAP is free software: you can redistribute it and/or modify
#    it under the terms of the GNU Lesser General Public License as
#    published by the Free Software Foundation, either version 3 of
#    the License, or (at your option) any later version.
#
#    EAP is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#    GNU Lesser General Public License for more details.
#
#    You should have received a copy of the GNU Lesser General Public
#    License along with EAP. If not, see <http://www.gnu.org/licenses/>.

import operator
import math
import random
import warnings # suppress some warnings related to invalid values

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import multiprocessing
import timeit

from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp


def evalSymbReg(individual, pset, X_train, y_train):
    # Transform the tree expression in a callable function
    func = gp.compile(expr=individual, pset=pset)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        y_pred = np.array([ func(*x) for x in X_train ])
        min_ = np.nanmin(y_pred)
        max_ = np.nanmax(y_pred)

        if ~np.isfinite(min_) or ~np.isfinite(max_):
            return 0,

        mid_ = (min_ + max_) / 2
        np.nan_to_num(y_pred, copy=False, nan=mid_, posinf=mid_, neginf=mid_)
        fit = r2_score(y_train, y_pred)

        if ~np.isfinite(fit):
            fit = 0

        return fit,

# load data
df = pd.read_csv('./data/Poly-10.csv', sep=',')
X = df.iloc[:,:-1].to_numpy()
y = df.iloc[:,-1].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1234)
_, cols = X_train.shape

 # set static height limit for all generated trees
pset = gp.PrimitiveSet("MAIN", cols)
pset.addPrimitive(np.add, 2, name="vadd")
pset.addPrimitive(np.subtract, 2, name="vsub")
pset.addPrimitive(np.multiply, 2, name="vmul")
pset.addPrimitive(np.divide, 2, name="vdiv")
pset.addPrimitive(np.negative, 1, name="vneg")
pset.addPrimitive(np.cos, 1, name="vcos")
pset.addPrimitive(np.sin, 1, name="vsin")
pset.addPrimitive(np.exp, 1, name="vexp")
pset.addPrimitive(np.log, 1, name="vlog")
pset.addEphemeralConstant("rand101", lambda: np.random.uniform(-1.0, 1.0))

creator.create("FitnessMin", base.Fitness, weights=(1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

maxHeight = 10
maxLength = 50

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=maxHeight)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", evalSymbReg, pset=pset, X_train=X_train, y_train=y_train)
toolbox.register("select", tools.selTournament, tournsize=5)

limitHeight = gp.staticLimit(operator.attrgetter('height'), maxHeight)
limitLength = gp.staticLimit(len, maxLength)

mutOperators = [ gp.mutUniform ]

def mutOperator(*args, **kwargs):
    mut = np.random.choice(mutOperators)
    return mut(*args, **kwargs)


toolbox.register("mate", gp.cxOnePoint)
toolbox.decorate("mate", limitHeight)
toolbox.decorate("mate", limitLength)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register('mutate', mutOperator, expr=toolbox.expr_mut, pset=pset)
toolbox.decorate("mutate", limitHeight)
toolbox.decorate("mutate", limitLength)


def main():
    np.seterr(all='ignore')
    random.seed(318)

    pool = multiprocessing.Pool()
    toolbox.register("map", pool.map)


    pop = toolbox.population(n=1000)
    hof = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.nanmean)
    stats.register("std", np.nanstd)
    stats.register("min", np.nanmin)
    stats.register("max", np.nanmax)

    algorithms.eaSimple(pop, toolbox, cxpb=1, mutpb=0.25, ngen=100, stats=stats, halloffame=hof)

    return pop, stats, hof

if __name__ == "__main__":
    print(timeit.timeit(stmt=main, number=1))
	# This file is part of EAP.
	#
	# EAP is free software: you can redistribute it and/or modify
	# it under the terms of the GNU Lesser General Public License as
	# published by the Free Software Foundation, either version 3 of
	# the License, or (at your option) any later version.
	#
	# EAP is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU Lesser General Public License for more details.
	#
	# You should have received a copy of the GNU Lesser General Public
	# License along with EAP. If not, see <http://www.gnu.org/licenses/>.

	import operator
	import math
	import random
	import warnings # suppress some warnings related to invalid values

	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import r2_score
	from sklearn.metrics import mean_squared_error
	import multiprocessing
	import timeit

	from deap import algorithms
	from deap import base
	from deap import creator
	from deap import tools
	from deap import gp


	def evalSymbReg(individual, pset, X_train, y_train):
	# Transform the tree expression in a callable function
	func = gp.compile(expr=individual, pset=pset)

	with warnings.catch_warnings():
	warnings.simplefilter("ignore")
	y_pred = np.array([ func(*x) for x in X_train ])
	min_ = np.nanmin(y_pred)
	max_ = np.nanmax(y_pred)

	if ~np.isfinite(min_) or ~np.isfinite(max_):
	return 0,

	mid_ = (min_ + max_) / 2
	np.nan_to_num(y_pred, copy=False, nan=mid_, posinf=mid_, neginf=mid_)
	fit = r2_score(y_train, y_pred)

	if ~np.isfinite(fit):
	fit = 0

	return fit,

	# load data
	df = pd.read_csv('./data/Poly-10.csv', sep=',')
	X = df.iloc[:,:-1].to_numpy()
	y = df.iloc[:,-1].to_numpy()

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1234)
	_, cols = X_train.shape

	# set static height limit for all generated trees
	pset = gp.PrimitiveSet("MAIN", cols)
	pset.addPrimitive(np.add, 2, name="vadd")
	pset.addPrimitive(np.subtract, 2, name="vsub")
	pset.addPrimitive(np.multiply, 2, name="vmul")
	pset.addPrimitive(np.divide, 2, name="vdiv")
	pset.addPrimitive(np.negative, 1, name="vneg")
	pset.addPrimitive(np.cos, 1, name="vcos")
	pset.addPrimitive(np.sin, 1, name="vsin")
	pset.addPrimitive(np.exp, 1, name="vexp")
	pset.addPrimitive(np.log, 1, name="vlog")
	pset.addEphemeralConstant("rand101", lambda: np.random.uniform(-1.0, 1.0))

	creator.create("FitnessMin", base.Fitness, weights=(1.0,))
	creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

	maxHeight = 10
	maxLength = 50

	toolbox = base.Toolbox()
	toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=maxHeight)
	toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
	toolbox.register("population", tools.initRepeat, list, toolbox.individual)
	toolbox.register("evaluate", evalSymbReg, pset=pset, X_train=X_train, y_train=y_train)
	toolbox.register("select", tools.selTournament, tournsize=5)

	limitHeight = gp.staticLimit(operator.attrgetter('height'), maxHeight)
	limitLength = gp.staticLimit(len, maxLength)

	mutOperators = [ gp.mutUniform ]

	def mutOperator(args, *kwargs):
	mut = np.random.choice(mutOperators)
	return mut(args, *kwargs)


	toolbox.register("mate", gp.cxOnePoint)
	toolbox.decorate("mate", limitHeight)
	toolbox.decorate("mate", limitLength)
	toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
	toolbox.register('mutate', mutOperator, expr=toolbox.expr_mut, pset=pset)
	toolbox.decorate("mutate", limitHeight)
	toolbox.decorate("mutate", limitLength)


	def main():
	np.seterr(all='ignore')
	random.seed(318)

	pool = multiprocessing.Pool()
	toolbox.register("map", pool.map)


	pop = toolbox.population(n=1000)
	hof = tools.HallOfFame(1)
	stats = tools.Statistics(lambda ind: ind.fitness.values)
	stats.register("avg", np.nanmean)
	stats.register("std", np.nanstd)
	stats.register("min", np.nanmin)
	stats.register("max", np.nanmax)

	algorithms.eaSimple(pop, toolbox, cxpb=1, mutpb=0.25, ngen=100, stats=stats, halloffame=hof)

	return pop, stats, hof

	if __name__ == "__main__":
	print(timeit.timeit(stmt=main, number=1))