Skip to content

Instantly share code, notes, and snippets.

@treviesweets
Created July 18, 2014 13:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save treviesweets/dfde205260c75d10f16b to your computer and use it in GitHub Desktop.
Save treviesweets/dfde205260c75d10f16b to your computer and use it in GitHub Desktop.
Line Fitting With Pyevolve
### finding the best fit line for data with a random error ###
#
# a simplification of the example given at http://acodersmusings.blogspot.co.uk/2009/07/curve-fitting-with-pyevolve.html
#
# using a genetic algorithm to infer the parameters of a model - the simplest form possible, a straight line fit.
#
from pyevolve import G1DList, GSimpleGA, Selectors, Scaling, DBAdapters
import numpy
import matplotlib.pyplot as plot
#
# Create Random Data
#
N = 100
x = numpy.random.rand(N)
err = numpy.random.normal(0, 3, N) # some noise
# set the actual parameters
m = 10.0
b = 5.0
# a simple linear function
def linear(x, m, b):
y = m * x + b
return y
# create some noisy test data
yt = linear(x, m, b) + err
# wrap up the sample points
sample_points = [x, yt]
# create a generator function that tests the goodness of fit for each chromosome
def generate_obj_fcn(sample_points):
# the objective function just calculates the square error
def obj_fcn(chromosome):
""" calculate the squared error of this chromosomes' parameters
"""
# unpack the m (slope) + b (intercept) from the chromosome
m_p = chromosome[0] / 100.0 # hack to allow decimals
b_p = chromosome[1] / 100.0
# unpack the sample points
x = sample_points[0]
yt = sample_points[1]
# calculate the predicted y's
yp = linear(x, m_p, b_p)
# calculate the square error for each point
deltas = ( yp - yt ) ** 2
# calculate the score
# note the '-' the ga looks for the highest score
score = -sum(deltas)
return score
return obj_fcn
#
# Create the genome
#
genome = G1DList.G1DList(2) # set up a list with 2 points 1 for m and 1 for b
genome.evaluator.set(generate_obj_fcn(sample_points))
genome.setParams(rangemin = 0.0, rangemax = 20.0 * 100.0) # hack to allow decimals
#
# Set up the engine
#
ga = GSimpleGA.GSimpleGA(genome)
ga.setPopulationSize(1000) # set the population to 1000
# set a selector
ga.selector.set(Selectors.GRouletteWheel) # set a selector - there are many in the pyevolve package
#ga.selector.set(Selectors.GTournamentSelector) # doesn't seem to make much difference here
#
# Change the scaling method
#
pop = ga.getPopulation()
pop.scaleMethod.set(Scaling.SigmaTruncScaling) # don't get this...
#
# Run the ga
#
ga.evolve() # freq_stats=10)
# pick the m and the b
mg = ga.bestIndividual()[0] / 100.0 # back to decimals
bg = ga.bestIndividual()[1] / 100.0
#
# do a numpy least square fit - a few less lines
#
x_in = numpy.vstack([x, numpy.ones(len(x))]).T
mn, bn = numpy.linalg.lstsq(x_in, yt)[0]
print '\nPyevolve - m : ', mg, ' b: ', bg
print 'Numpy - m : ', mn, ' b: ', bn
#
# plot the two results
#
fig = plot.figure(figsize = (12, 6))
plot.scatter(x, yt, color = 'r')
plot.plot(x, linear(x, mn, bn), 'b--', label = 'numpy least sq')
plot.plot(x, linear(x, mg, bg), 'c--', label = 'ga least sq')
plot.title('Comparing Approaches')
plot.legend()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment