Created
July 18, 2014 13:08
-
-
Save treviesweets/dfde205260c75d10f16b to your computer and use it in GitHub Desktop.
Line Fitting With Pyevolve
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### finding the best fit line for data with a random error ### | |
# | |
# a simplification of the example given at http://acodersmusings.blogspot.co.uk/2009/07/curve-fitting-with-pyevolve.html | |
# | |
# using a genetic algorithm to infer the parameters of a model - the simplest form possible, a straight line fit. | |
# | |
from pyevolve import G1DList, GSimpleGA, Selectors, Scaling, DBAdapters | |
import numpy | |
import matplotlib.pyplot as plot | |
# | |
# Create Random Data | |
# | |
N = 100 | |
x = numpy.random.rand(N) | |
err = numpy.random.normal(0, 3, N) # some noise | |
# set the actual parameters | |
m = 10.0 | |
b = 5.0 | |
# a simple linear function | |
def linear(x, m, b): | |
y = m * x + b | |
return y | |
# create some noisy test data | |
yt = linear(x, m, b) + err | |
# wrap up the sample points | |
sample_points = [x, yt] | |
# create a generator function that tests the goodness of fit for each chromosome | |
def generate_obj_fcn(sample_points): | |
# the objective function just calculates the square error | |
def obj_fcn(chromosome): | |
""" calculate the squared error of this chromosomes' parameters | |
""" | |
# unpack the m (slope) + b (intercept) from the chromosome | |
m_p = chromosome[0] / 100.0 # hack to allow decimals | |
b_p = chromosome[1] / 100.0 | |
# unpack the sample points | |
x = sample_points[0] | |
yt = sample_points[1] | |
# calculate the predicted y's | |
yp = linear(x, m_p, b_p) | |
# calculate the square error for each point | |
deltas = ( yp - yt ) ** 2 | |
# calculate the score | |
# note the '-' the ga looks for the highest score | |
score = -sum(deltas) | |
return score | |
return obj_fcn | |
# | |
# Create the genome | |
# | |
genome = G1DList.G1DList(2) # set up a list with 2 points 1 for m and 1 for b | |
genome.evaluator.set(generate_obj_fcn(sample_points)) | |
genome.setParams(rangemin = 0.0, rangemax = 20.0 * 100.0) # hack to allow decimals | |
# | |
# Set up the engine | |
# | |
ga = GSimpleGA.GSimpleGA(genome) | |
ga.setPopulationSize(1000) # set the population to 1000 | |
# set a selector | |
ga.selector.set(Selectors.GRouletteWheel) # set a selector - there are many in the pyevolve package | |
#ga.selector.set(Selectors.GTournamentSelector) # doesn't seem to make much difference here | |
# | |
# Change the scaling method | |
# | |
pop = ga.getPopulation() | |
pop.scaleMethod.set(Scaling.SigmaTruncScaling) # don't get this... | |
# | |
# Run the ga | |
# | |
ga.evolve() # freq_stats=10) | |
# pick the m and the b | |
mg = ga.bestIndividual()[0] / 100.0 # back to decimals | |
bg = ga.bestIndividual()[1] / 100.0 | |
# | |
# do a numpy least square fit - a few less lines | |
# | |
x_in = numpy.vstack([x, numpy.ones(len(x))]).T | |
mn, bn = numpy.linalg.lstsq(x_in, yt)[0] | |
print '\nPyevolve - m : ', mg, ' b: ', bg | |
print 'Numpy - m : ', mn, ' b: ', bn | |
# | |
# plot the two results | |
# | |
fig = plot.figure(figsize = (12, 6)) | |
plot.scatter(x, yt, color = 'r') | |
plot.plot(x, linear(x, mn, bn), 'b--', label = 'numpy least sq') | |
plot.plot(x, linear(x, mg, bg), 'c--', label = 'ga least sq') | |
plot.title('Comparing Approaches') | |
plot.legend() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment