ZacCranko/Steihaug-Cg.jl

## Steihaug-Cg.jl
using ForwardDiff

norm2(x::Vector) = dot(x,x) # define square norm function

function linesearch(z,
                    p,
                    grad,
                    B,
                    Δ,
                    c1::Real=1e-4,
                    c2::Real=0.9,
                    rho::Real=0.9,
                    maxiter::Int=1000)
    # initialise a
    a = norm2(p)
    b = 2*dot(z,p)
    c = norm2(z) - Δ^2
    a = (-b + sqrt(b^2-4*a*c))/(2*a)
    (isnan(a) || a < 0) && warn("Step size, $a, is screwed")

    w1 = w2 = false
    i = 0

    m(s)  = dot(s,grad) + 0.5*dot(s, B*s)
    ∇m(s) = grad + B*s

    while !(w1 && w2) # While the Wolfe conditions aren't met
        i += 1
        s = z + a*p

        # Strong Wolfe rules
        w1 = (m(s) <= m(z) + c1*a*dot(p,∇m(z)))
        w2 = (abs(dot(p,∇m(s))) <= c2*abs(dot(p,∇m(z))))

        a *= rho
        i > maxiter && (warn("Backtrack line search exceeds maxiter"); return z + 0.1*p)
    end

    s = z + a*p
    norm(s) > Δ && warn("Outside of trust region")
    return s
end

function steinhaug(grad::Vector, B::Matrix, Δ::Real; e=1e-6)
    z, r = zero(grad), grad
    p = -r

    for i = 1:length(p)
        (dot(p,B*p) <= 0) && return linesearch(z,p,grad,B,Δ)

        a = norm2(r)/dot(p,B*p)
        s = z + a*p

        (norm(s) > Δ) && return linesearch(z,p,grad,B,Δ)

        z = s
        r0 = r; r += a*B*p
        (norm(r) < e) && return s

        p = (norm2(r)/norm2(r0))*p - r
    end
end

function trust_region(f::Function, x::Vector; e=10e-6, maxiter=100, initial_region=0.1)
    grad = forwarddiff_gradient(f, Float64, fadtype=:typed) # build jacobian
    hes = forwarddiff_hessian(f, Float64, fadtype=:typed)   # build hessian

    # 0 < γ2 < 1 < γ1 region size controls
    γ1, γ2 = 1.5, 0.5

    # 0 < η2 < η1 < 1 how good our step is
    η1, η2 = 0.75, 0.25

    Δ = initial_region
    g = grad(x); i=0

    while (norm(g) > e) && (i < maxiter)
        i+=1
        g, B = grad(x), hes(x)
        s = steinhaug(g, B, Δ)
        ρ = -(f(x)-f(x+s))/(s⋅g-0.5dot(s,B*s))

        if ρ >= η1 # very successful step
            x += s
            Δ = min(γ1*Δ,0.99) # enlarge region
        elseif ρ >= η2 # successful step
            x += s
        else  # bad step
            Δ = max(γ2*Δ,0.01) # reduce region
        end
        @show i, Δ, f(x)
    end
    x
end

f(x::Vector) = 100(x[2]-x[1]^2)^2 + (1-x[1])^2
x0 = zeros(2)

@time x = trust_region(f,x0; initial_region=0.5)
@show f(x)
@show x
	using ForwardDiff

	norm2(x::Vector) = dot(x,x) # define square norm function

	function linesearch(z,
	p,
	grad,
	B,
	Δ,
	c1::Real=1e-4,
	c2::Real=0.9,
	rho::Real=0.9,
	maxiter::Int=1000)
	# initialise a
	a = norm2(p)
	b = 2*dot(z,p)
	c = norm2(z) - Δ^2
	a = (-b + sqrt(b^2-4ac))/(2*a)
	(isnan(a) \|\| a < 0) && warn("Step size, $a, is screwed")

	w1 = w2 = false
	i = 0

	m(s) = dot(s,grad) + 0.5dot(s, Bs)
	∇m(s) = grad + B*s

	while !(w1 && w2) # While the Wolfe conditions aren't met
	i += 1
	s = z + a*p

	# Strong Wolfe rules
	w1 = (m(s) <= m(z) + c1adot(p,∇m(z)))
	w2 = (abs(dot(p,∇m(s))) <= c2*abs(dot(p,∇m(z))))

	a *= rho
	i > maxiter && (warn("Backtrack line search exceeds maxiter"); return z + 0.1*p)
	end

	s = z + a*p
	norm(s) > Δ && warn("Outside of trust region")
	return s
	end

	function steinhaug(grad::Vector, B::Matrix, Δ::Real; e=1e-6)
	z, r = zero(grad), grad
	p = -r

	for i = 1:length(p)
	(dot(p,B*p) <= 0) && return linesearch(z,p,grad,B,Δ)

	a = norm2(r)/dot(p,B*p)
	s = z + a*p

	(norm(s) > Δ) && return linesearch(z,p,grad,B,Δ)

	z = s
	r0 = r; r += aBp
	(norm(r) < e) && return s

	p = (norm2(r)/norm2(r0))*p - r
	end
	end

	function trust_region(f::Function, x::Vector; e=10e-6, maxiter=100, initial_region=0.1)
	grad = forwarddiff_gradient(f, Float64, fadtype=:typed) # build jacobian
	hes = forwarddiff_hessian(f, Float64, fadtype=:typed) # build hessian

	# 0 < γ2 < 1 < γ1 region size controls
	γ1, γ2 = 1.5, 0.5

	# 0 < η2 < η1 < 1 how good our step is
	η1, η2 = 0.75, 0.25

	Δ = initial_region
	g = grad(x); i=0

	while (norm(g) > e) && (i < maxiter)
	i+=1
	g, B = grad(x), hes(x)
	s = steinhaug(g, B, Δ)
	ρ = -(f(x)-f(x+s))/(s⋅g-0.5dot(s,B*s))

	if ρ >= η1 # very successful step
	x += s
	Δ = min(γ1*Δ,0.99) # enlarge region
	elseif ρ >= η2 # successful step
	x += s
	else # bad step
	Δ = max(γ2*Δ,0.01) # reduce region
	end
	@show i, Δ, f(x)
	end
	x
	end

	f(x::Vector) = 100(x[2]-x[1]^2)^2 + (1-x[1])^2
	x0 = zeros(2)

	@time x = trust_region(f,x0; initial_region=0.5)
	@show f(x)
	@show x