Skip to content

Instantly share code, notes, and snippets.

@ojwoodford
Last active October 26, 2023 05:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ojwoodford/789e85197b18dcddb349e1f695bffc31 to your computer and use it in GitHub Desktop.
Save ojwoodford/789e85197b18dcddb349e1f695bffc31 to your computer and use it in GitHub Desktop.
Comparison of non-linear least squares optimizers in Julia, using Rosenbrock-like functions of various dimensions
# Compare non-linear least squares solvers on the multi-dimensional Rosenbrock function
startpoint(n) = Float64[i % 2 == 0 ? 1.0 : -1.2 for i = 1:n]
const diagonal_weight_dense = 1.2
const offdiagonal_weight_dense = 1.0
const diagonal_weight_sparse = 1.0
const offdiagonal_weight_sparse = 10.0
################################################################################
# NLLS formulation
using NLLSsolver, Static, StaticArrays
struct RosenbrockDiag <: NLLSsolver.AbstractResidual
weight::Float64
varind::Int
end
Base.eltype(::RosenbrockDiag) = Float64
NLLSsolver.ndeps(::RosenbrockDiag) = static(1) # Residual depends on 1 variable
NLLSsolver.nres(::RosenbrockDiag) = static(1) # Residual has length 2
NLLSsolver.varindices(res::RosenbrockDiag) = res.varind
NLLSsolver.getvars(res::RosenbrockDiag, vars::Vector) = (vars[res.varind]::Float64, )
NLLSsolver.computeresidual(res::RosenbrockDiag, x) = res.weight * (x - 1)
struct RosenbrockOffDiag <: NLLSsolver.AbstractResidual
weight::Float64
varind::SVector{2, Int}
end
RosenbrockOffDiag(w, j, k) = RosenbrockOffDiag(w, SVector(j, k))
Base.eltype(::RosenbrockOffDiag) = Float64
NLLSsolver.ndeps(::RosenbrockOffDiag) = static(2) # Residual depends on 2 variables
NLLSsolver.nres(::RosenbrockOffDiag) = static(1) # Residual has length 1
NLLSsolver.varindices(res::RosenbrockOffDiag) = res.varind
NLLSsolver.getvars(res::RosenbrockOffDiag, vars::Vector) = (vars[res.varind[1]]::Float64, vars[res.varind[2]]::Float64)
NLLSsolver.computeresidual(res::RosenbrockOffDiag, x, y) = res.weight * (y - x ^ 2)
function nllssolver_sparse(n::Int)
costs = NLLSsolver.CostStruct{Union{RosenbrockDiag, RosenbrockOffDiag}}()
costs.data[RosenbrockDiag] = RosenbrockDiag[RosenbrockDiag(diagonal_weight_sparse, k) for k = 1:n]
costs.data[RosenbrockOffDiag] = RosenbrockOffDiag[RosenbrockOffDiag(offdiagonal_weight_sparse, k, k+1) for k = 1:n-1]
return NLLSProblem(startpoint(n), costs)
end
function nllssolver_dense(n::Int)
costs = NLLSsolver.CostStruct{Union{RosenbrockDiag, RosenbrockOffDiag}}()
costs.data[RosenbrockDiag] = RosenbrockDiag[RosenbrockDiag(diagonal_weight_dense, k) for k = 1:n]
costs.data[RosenbrockOffDiag] = vcat(collect(Iterators.flatten([[RosenbrockOffDiag(offdiagonal_weight_dense, j, k) for k = j+1:n] for j = 1:n-1])))
return NLLSProblem(startpoint(n), costs)
end
function optimize(model::NLLSProblem)
result = NLLSsolver.optimize!(model, NLLSOptions(maxiters = 30000))
return result.niterations, result.startcost, result.bestcost
end
################################################################################
################################################################################
# JuMP formulation
using JuMP, Ipopt
import MathOptInterface as MOI
function ipopt_sparse(n::Int)
model = Model(Ipopt.Optimizer)
x0 = startpoint(n)
@variable(model, x[i in 1:n], start = x0[i])
@variable(model, residuals[1:2*n-1])
@constraints(model, begin
[k in 1:(n - 1)], residuals[k] == offdiagonal_weight_sparse * (x[k + 1] - x[k]^2)
[k in 1:n], residuals[n-1+k] == diagonal_weight_sparse * (x[k] - 1)
end)
@objective(model, Min, sum(residuals .^ 2))
set_silent(model)
return model
end
function ipopt_dense(n::Int)
model = Model(Ipopt.Optimizer)
x0 = startpoint(n)
@variable(model, x[i = 1:n], start = x0[i])
len = n + ((n - 1) * n) >> 1
@variable(model, residuals[1:len])
i = 0
for j in 1:n-1
for k in j+1:n
i += 1
@constraint(model, residuals[i] == offdiagonal_weight_dense * (x[k] - x[j]^2))
end
end
@constraint(model, [k in 1:n], residuals[i+k] == diagonal_weight_dense * (x[k] - 1))
@objective(model, Min, sum(residuals .^ 2))
set_silent(model)
return model
end
function optimize(model::JuMP.Model)
JuMP.optimize!(model)
return MOI.get(model, MOI.BarrierIterations()), NaN64, objective_value(model)
end
################################################################################
################################################################################
# JSO formulation
using JSOSolvers, NLPModels, NLPModelsJuMP
struct JSOModel{T}
model::MathOptNLSModel
solver::T
end
function jso_sparse(n::Int, solver)
model = Model()
x0 = startpoint(n)
@variable(model, x[i = 1:n], start = x0[i])
@NLexpression(model, FA[k = 1:(n - 1)], offdiagonal_weight_sparse * (x[k + 1] - x[k]^2))
@expression(model, FB[k = 1:n], diagonal_weight_sparse * (x[k] - 1))
return JSOModel(MathOptNLSModel(model, [FA; FB]), solver)
end
function jso_dense(n::Int, solver)
model = Model()
x0 = startpoint(n)
@variable(model, x[i = 1:n], start = x0[i])
FA = Any[]
for j = 1:n-1
for k = j+1:n
push!(FA, @NLexpression(model, offdiagonal_weight_dense * (x[k] - x[j]^2)))
end
end
@expression(model, FB[k = 1:n], diagonal_weight_dense * (x[k] - 1))
return JSOModel(MathOptNLSModel(model, [FA; FB]), solver)
end
function optimize(model::JSOModel)
result = model.solver(model.model)
return result.iter, NaN64, result.objective
end
################################################################################
################################################################################
# LeastSquaresOptim formulation
import LeastSquaresOptim, SparseArrays
function rosenbrock_sparse!(out, x)
@inbounds for k = 1:length(x)-1
out[2*k-1] = diagonal_weight_sparse * (x[k] - 1)
out[2*k] = offdiagonal_weight_sparse * (x[k + 1] - x[k]^2)
end
out[end] = diagonal_weight_sparse * (x[end] - 1)
end
function rosenbrockJ!(out, x)
@inbounds for k = 1:length(x)-1
out.nzval[3*k-2] = diagonal_weight_sparse
out.nzval[3*k-1] = -2 * offdiagonal_weight_sparse * x[k]
out.nzval[3*k] = offdiagonal_weight_sparse
end
out.nzval[end] = diagonal_weight_sparse
end
function rosenbrock_dense!(out, x)
n = length(x)
@inbounds for k = 1:n
out[k] = diagonal_weight_dense * (x[k] - 1)
end
i = n + 1
@inbounds for j = 1:n-1
xj2 = x[j] ^ 2
for k = j+1:n
out[i] = offdiagonal_weight_dense * (x[k] - xj2)
i += 1
end
end
end
function lso_sparse(n::Int)
len = 2 * n - 1
J = SparseArrays.sparse(vcat(collect(Iterators.flatten([(2*k-1, 2*k, 2*k) for k in 1:(n-1)])), len), vcat(collect(Iterators.flatten([(k, k, k+1) for k in 1:(n-1)])), n), zeros(3*n-2), len, n)
return LeastSquaresOptim.LeastSquaresProblem(; x=startpoint(n), f! = rosenbrock_sparse!, g! = rosenbrockJ!, J=J, output_length=len)
end
lso_dense(n::Int) = LeastSquaresOptim.LeastSquaresProblem(; x=startpoint(n), f! = rosenbrock_dense!, output_length=n+((n-1)*n)>>1, autodiff=:forward)
function optimize(model::LeastSquaresOptim.LeastSquaresProblem)
result = LeastSquaresOptim.optimize!(model, LeastSquaresOptim.LevenbergMarquardt(); iterations=30000)
return result.iterations, NaN64, result.ssr * 0.5
end
################################################################################
################################################################################
# Run the test and display results
using Plots, Printf
tickformatter(x) = @sprintf("%g", x)
function runtest(name, sizes, solvers)
result = Matrix{Float64}(undef, (4, length(sizes)))
p = plot()
# For each optimizer
for (label, constructor) in solvers
# First run the optimzation to compile everything
optimize(constructor(sizes[1]))
optimize(constructor(sizes[end])) # Do largest and smallest in case there are dense and sparse code paths
# Go over each problem, recording the time, iterations and start and end cost
for (i, n) in enumerate(sizes)
# Construct the problem
model = constructor(n)
# Optimize
result[1,i] = @elapsed res = optimize(model)
result[2,i] = res[1]
result[3,i] = res[2]
result[4,i] = res[3]
if res[3] > 1.e-10
cost = res[3]
println("$label on size $n converged to a cost of $cost")
result[1,i] = NaN64
end
end
# Plot the graphs
plot!(p, vec(sizes), vec(result[1,:]), label=label)
end
yaxis!(p, minorgrid=true, formatter=tickformatter)
xaxis!(p, minorgrid=true, formatter=tickformatter)
plot!(p, legend=:topleft, yscale=:log10, xscale=:log2)
title!(p, "Speed comparison: $name")
xlabel!(p, "Problem size")
ylabel!(p, "Optimization time (s)")
display(p)
end
################################################################################
runtest("dense problem", [2 4 8 16 32 64], ["Ipopt" => ipopt_dense, "JSO trunk" => n->jso_dense(n, trunk), "JSO tron" => n->jso_dense(n, tron), "LeastSquaresOptim LM" => lso_dense, "NLLSsolver LM" => nllssolver_dense])
runtest("sparse problem", [16 46 128 362 1024 2896], ["Ipopt" => ipopt_sparse, "JSO trunk" => n->jso_sparse(n, trunk), "JSO tron" => n->jso_sparse(n, tron), "LeastSquaresOptim LM" => lso_sparse, "NLLSsolver LM" => nllssolver_sparse])
@odow
Copy link

odow commented Oct 12, 2023

I'd write the JuMP model as

function ipopt_sparse(n::Int)
    model = Model(Ipopt.Optimizer)
    x0 = startpoint(n)
    @variable(model, x[i in 1:n], start = x0[i])
    @variable(model, residuals[1:2*n-1])
    @constraints(model, begin
        [k in 1:(n - 1)], residuals[k] == offdiagonal_weight_sparse * (x[k + 1] - x[k]^2) 
        [k in 1:n], residuals[n-1+k] == diagonal_weight_sparse * (x[k] - 1)
    end)
    @objective(model, Min, sum(residuals .^ 2))
    set_time_limit_sec(model, 30.0)
    set_silent(model)
    return model
end

@ojwoodford
Copy link
Author

Thanks, @odow . Is there any computational benefit to this, or is it just a style thing? The dense problem is a bit trickier. Any thoughts on that?

@odow
Copy link

odow commented Oct 12, 2023

Is there any computational benefit to this

ipopt gets to see that one constraint is quadratic and one constraint is linear. Using @NLconstraint forces it to use automatic differentiation to compute gradients.

Any thoughts on that?

Didn't test or benchmark, but something like this:

function ipopt_dense(n::Int)
    model = Model(Ipopt.Optimizer)
    x0 = startpoint(n)
    @variable(model, x[i = 1:n], start = x0[i])
    len = n + ((n - 1) * n) >> 1
    @variable(model, residuals[1:len])
    i = 0
    for j in 1:(n - 1)
        for k in j:n
            i += 1
            @constraint(model, residuals[i] == offdiagonal_weight_dense * (x[k] - x[j]^2))
        end
    end
    @constraint(model, [k in 1:n], residuals[i+k] == diagonal_weight_dense * (x[k] - 1))
    @objective(model, Min, sum(residuals .^ 2))
    set_time_limit_sec(model, 90.0)
    set_silent(model)
    return model
end

@ojwoodford
Copy link
Author

@odow 🙏 Code and plots updated

@pkofod
Copy link

pkofod commented Oct 13, 2023

Is it the second variant here? https://en.wikipedia.org/wiki/Rosenbrock_function

@ojwoodford
Copy link
Author

@pkofod The sparse problem is almost identical. The dense problem is different - it generates a fully dense Hessian. You can see the cost definition for the sparse cost on line 141, and the dense cost on line 158.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment