Last active
April 19, 2024 21:30
-
-
Save MarcinKonowalczyk/3bf7398fd2f2be9fdeb54befe854a305 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"math" | |
"math/rand" | |
) | |
func makeData(p []float64, N int, noise float64) [][]float64 { | |
// Make a bunch of points on the XY plane according to the function Z = (X - xo)^2 + (Y - yo)^2 + noise | |
if len(p) != 2 { | |
panic("makeData: p must have length 2") | |
} | |
xo, yo := p[0], p[1] | |
XYZ := make([][]float64, 0, N) | |
for _ = range N { | |
x, y := rand.Float64()*10, rand.Float64()*10 // Randomly sample a point in the XY plane | |
n := rand.NormFloat64() * noise // Make some random, normally distributed noise | |
z := (x-xo)*(x-xo) + (y-yo)*(y-yo) + n // Assemble the point according to the function | |
// The data generating process does not have to be the same as the one we use to fit the data, | |
// but obviously if its far away from the truth, we will have a hard time fitting it. | |
// z := (x-xo)*(x-xo) + (y-yo)*(y-yo) + 0.1*(x-xo)*(y-yo) + n | |
XYZ = append(XYZ, []float64{x, y, z}) | |
} | |
return XYZ | |
} | |
func main() { | |
// Make some random data. | |
p_actual := []float64{11.4, 3.1415} // Actual parameters | |
XYZ := makeData(p_actual, 10, 1.0) | |
fmt.Println("Actual parameters: ", p_actual) | |
// ------- | |
// Beyond this line we pretend we don't know the data generating process, | |
// and we try to fit the data | |
optimizer := NewOptimizer() | |
// Initial guess for the parameters. | |
p0 := []float64{0, 0} | |
// Loss function tolerance. If the loss function changes by less than this, we stop. | |
eps := 0.001 | |
// The scale of the problem to use for the initial step. | |
// It is best if all the parameters are of the same order of magnitude. | |
scale := 0.1 | |
// Model we want to fit. For each x, y point and a set of parameters p, | |
// we want to compute the value z = f(x, y | p) | |
model := func(p []float64, x, y float64) float64 { | |
return (x-p[0])*(x-p[0]) + (y-p[1])*(y-p[1]) | |
} | |
// Objective function | |
N := 0 | |
L := []float64{} | |
objfunc := func(p []float64) float64 { | |
// Compute the loss (in this case sum of the squared residuals) | |
loss := 0.0 | |
for _, xyz := range XYZ { | |
x, y, z := xyz[0], xyz[1], xyz[2] | |
value := model(p, x, y) | |
delta := value - z | |
loss += delta * delta | |
} | |
// We can keep track of some things here, like the number of iterations | |
// Or the value of the loss function ads the iterations progress | |
// We don't have to do this, but it can be useful for debugging/monitoring | |
N++ | |
L = append(L, loss) | |
return loss | |
} | |
// Actually run the optimizer | |
min, p := optimizer.Optimize(objfunc, p0, eps, scale) | |
// Parameter delta (from the actual parameters) | |
pd := []float64{p[0] - p_actual[0], p[1] - p_actual[1]} | |
// Print the results | |
fmt.Println("Optimization complete") | |
fmt.Println("Number of iterations:", N) | |
fmt.Println("Initial guess:", p0) | |
fmt.Println("Optimal parameters:", p) | |
fmt.Println("Parameter delta:", pd) | |
fmt.Println("Loss function value:", min) | |
} | |
//========================================================================= | |
// | |
// ##### ##### ###### ## ### ### ## ###### ##### ##### | |
// ## ## ## ## ## ## ## # # ## ## ## ## ## ## | |
// ## ## ##### ## ## ## ## ## ## ## ##### ##### | |
// ## ## ## ## ## ## ## ## ## ## ## ## | |
// ##### ## ## ## ## ## ## ###### ##### ## ## | |
// | |
//========================================================================= | |
// Implementation of the Nelder-Mead simplex method for function minimization. | |
// Based on old implementation in the influxdb project: | |
// https://github.com/influxdata/influxdb/blob/1.7.9-dev/query/neldermead/neldermead.go | |
const ( | |
defaultMaxIterations = 1000 | |
// reflection coefficient | |
defaultAlpha = 1.0 | |
// contraction coefficient | |
defaultBeta = 0.5 | |
// expansion coefficient | |
defaultGamma = 2.0 | |
) | |
// Optimizer represents the parameters to the Nelder-Mead simplex method. | |
type Optimizer struct { | |
// Maximum number of iterations. | |
MaxIterations int | |
// Reflection coefficient. | |
Alpha, | |
// Contraction coefficient. | |
Beta, | |
// Expansion coefficient. | |
Gamma float64 | |
} | |
// New returns a new instance of Optimizer with all values set to the defaults. | |
func NewOptimizer() *Optimizer { | |
return &Optimizer{ | |
MaxIterations: defaultMaxIterations, | |
Alpha: defaultAlpha, | |
Beta: defaultBeta, | |
Gamma: defaultGamma, | |
} | |
} | |
// Optimize applies the Nelder-Mead simplex method with the Optimizer's settings. | |
func (o *Optimizer) Optimize( | |
objfunc func([]float64) float64, | |
start []float64, | |
epsilon, | |
scale float64, | |
) (float64, []float64) { | |
n := len(start) | |
//holds vertices of simplex | |
v := make([][]float64, n+1) | |
for i := range v { | |
v[i] = make([]float64, n) | |
} | |
//value of function at each vertex | |
f := make([]float64, n+1) | |
//reflection - coordinates | |
vr := make([]float64, n) | |
//expansion - coordinates | |
ve := make([]float64, n) | |
//contraction - coordinates | |
vc := make([]float64, n) | |
//centroid - coordinates | |
vm := make([]float64, n) | |
// create the initial simplex | |
// assume one of the vertices is 0,0 | |
pn := scale * (math.Sqrt(float64(n+1)) - 1 + float64(n)) / (float64(n) * math.Sqrt(2)) | |
qn := scale * (math.Sqrt(float64(n+1)) - 1) / (float64(n) * math.Sqrt(2)) | |
for i := 0; i < n; i++ { | |
v[0][i] = start[i] | |
} | |
for i := 1; i <= n; i++ { | |
for j := 0; j < n; j++ { | |
if i-1 == j { | |
v[i][j] = pn + start[j] | |
} else { | |
v[i][j] = qn + start[j] | |
} | |
} | |
} | |
// find the initial function values | |
for j := 0; j <= n; j++ { | |
f[j] = objfunc(v[j]) | |
} | |
// begin the main loop of the minimization | |
for itr := 1; itr <= o.MaxIterations; itr++ { | |
// find the indexes of the largest and smallest values | |
vg := 0 | |
vs := 0 | |
for i := 0; i <= n; i++ { | |
if f[i] > f[vg] { | |
vg = i | |
} | |
if f[i] < f[vs] { | |
vs = i | |
} | |
} | |
// find the index of the second largest value | |
vh := vs | |
for i := 0; i <= n; i++ { | |
if f[i] > f[vh] && f[i] < f[vg] { | |
vh = i | |
} | |
} | |
// calculate the centroid | |
for i := 0; i <= n-1; i++ { | |
cent := 0.0 | |
for m := 0; m <= n; m++ { | |
if m != vg { | |
cent += v[m][i] | |
} | |
} | |
vm[i] = cent / float64(n) | |
} | |
// reflect vg to new vertex vr | |
for i := 0; i <= n-1; i++ { | |
vr[i] = vm[i] + o.Alpha*(vm[i]-v[vg][i]) | |
} | |
// value of function at reflection point | |
fr := objfunc(vr) | |
if fr < f[vh] && fr >= f[vs] { | |
for i := 0; i <= n-1; i++ { | |
v[vg][i] = vr[i] | |
} | |
f[vg] = fr | |
} | |
// investigate a step further in this direction | |
if fr < f[vs] { | |
for i := 0; i <= n-1; i++ { | |
ve[i] = vm[i] + o.Gamma*(vr[i]-vm[i]) | |
} | |
// value of function at expansion point | |
fe := objfunc(ve) | |
// by making fe < fr as opposed to fe < f[vs], | |
// Rosenbrocks function takes 63 iterations as opposed | |
// to 64 when using double variables. | |
if fe < fr { | |
for i := 0; i <= n-1; i++ { | |
v[vg][i] = ve[i] | |
} | |
f[vg] = fe | |
} else { | |
for i := 0; i <= n-1; i++ { | |
v[vg][i] = vr[i] | |
} | |
f[vg] = fr | |
} | |
} | |
// check to see if a contraction is necessary | |
if fr >= f[vh] { | |
if fr < f[vg] && fr >= f[vh] { | |
// perform outside contraction | |
for i := 0; i <= n-1; i++ { | |
vc[i] = vm[i] + o.Beta*(vr[i]-vm[i]) | |
} | |
} else { | |
// perform inside contraction | |
for i := 0; i <= n-1; i++ { | |
vc[i] = vm[i] - o.Beta*(vm[i]-v[vg][i]) | |
} | |
} | |
// value of function at contraction point | |
fc := objfunc(vc) | |
if fc < f[vg] { | |
for i := 0; i <= n-1; i++ { | |
v[vg][i] = vc[i] | |
} | |
f[vg] = fc | |
} else { | |
// at this point the contraction is not successful, | |
// we must halve the distance from vs to all the | |
// vertices of the simplex and then continue. | |
for row := 0; row <= n; row++ { | |
if row != vs { | |
for i := 0; i <= n-1; i++ { | |
v[row][i] = v[vs][i] + (v[row][i]-v[vs][i])/2.0 | |
} | |
} | |
} | |
f[vg] = objfunc(v[vg]) | |
f[vh] = objfunc(v[vh]) | |
} | |
} | |
// test for convergence | |
fsum := 0.0 | |
for i := 0; i <= n; i++ { | |
fsum += f[i] | |
} | |
favg := fsum / float64(n+1) | |
s := 0.0 | |
for i := 0; i <= n; i++ { | |
s += math.Pow((f[i]-favg), 2.0) / float64(n) | |
} | |
s = math.Sqrt(s) | |
if s < epsilon { | |
break | |
} | |
} | |
// find the index of the smallest value | |
vs := 0 | |
for i := 0; i <= n; i++ { | |
if f[i] < f[vs] { | |
vs = i | |
} | |
} | |
parameters := make([]float64, n) | |
for i := 0; i < n; i++ { | |
parameters[i] = v[vs][i] | |
} | |
min := objfunc(v[vs]) | |
return min, parameters | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment