Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Line Fitting With Pyevolve
### finding the best fit line for data with a random error ###
#
# a simplification of the example given at http://acodersmusings.blogspot.co.uk/2009/07/curve-fitting-with-pyevolve.html
#
# using a genetic algorithm to infer the parameters of a model - the simplest form possible, a straight line fit.
#
from pyevolve import G1DList, GSimpleGA, Selectors, Scaling, DBAdapters
import numpy
import matplotlib.pyplot as plot
#
# Create Random Data
#
N = 100
x = numpy.random.rand(N)
err = numpy.random.normal(0, 3, N) # some noise
# set the actual parameters
m = 10.0
b = 5.0
# a simple linear function
def linear(x, m, b):
y = m * x + b
return y
# create some noisy test data
yt = linear(x, m, b) + err
# wrap up the sample points
sample_points = [x, yt]
# create a generator function that tests the goodness of fit for each chromosome
def generate_obj_fcn(sample_points):
# the objective function just calculates the square error
def obj_fcn(chromosome):
""" calculate the squared error of this chromosomes' parameters
"""
# unpack the m (slope) + b (intercept) from the chromosome
m_p = chromosome[0] / 100.0 # hack to allow decimals
b_p = chromosome[1] / 100.0
# unpack the sample points
x = sample_points[0]
yt = sample_points[1]
# calculate the predicted y's
yp = linear(x, m_p, b_p)
# calculate the square error for each point
deltas = ( yp - yt ) ** 2
# calculate the score
# note the '-' the ga looks for the highest score
score = -sum(deltas)
return score
return obj_fcn
#
# Create the genome
#
genome = G1DList.G1DList(2) # set up a list with 2 points 1 for m and 1 for b
genome.evaluator.set(generate_obj_fcn(sample_points))
genome.setParams(rangemin = 0.0, rangemax = 20.0 * 100.0) # hack to allow decimals
#
# Set up the engine
#
ga = GSimpleGA.GSimpleGA(genome)
ga.setPopulationSize(1000) # set the population to 1000
# set a selector
ga.selector.set(Selectors.GRouletteWheel) # set a selector - there are many in the pyevolve package
#ga.selector.set(Selectors.GTournamentSelector) # doesn't seem to make much difference here
#
# Change the scaling method
#
pop = ga.getPopulation()
pop.scaleMethod.set(Scaling.SigmaTruncScaling) # don't get this...
#
# Run the ga
#
ga.evolve() # freq_stats=10)
# pick the m and the b
mg = ga.bestIndividual()[0] / 100.0 # back to decimals
bg = ga.bestIndividual()[1] / 100.0
#
# do a numpy least square fit - a few less lines
#
x_in = numpy.vstack([x, numpy.ones(len(x))]).T
mn, bn = numpy.linalg.lstsq(x_in, yt)[0]
print '\nPyevolve - m : ', mg, ' b: ', bg
print 'Numpy - m : ', mn, ' b: ', bn
#
# plot the two results
#
fig = plot.figure(figsize = (12, 6))
plot.scatter(x, yt, color = 'r')
plot.plot(x, linear(x, mn, bn), 'b--', label = 'numpy least sq')
plot.plot(x, linear(x, mg, bg), 'c--', label = 'ga least sq')
plot.title('Comparing Approaches')
plot.legend()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.