Skip to content

Instantly share code, notes, and snippets.

@ZacCranko
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ZacCranko/3bc60d91c7a85c0398a4 to your computer and use it in GitHub Desktop.
Save ZacCranko/3bc60d91c7a85c0398a4 to your computer and use it in GitHub Desktop.
using ForwardDiff
norm2(x::Vector) = dot(x,x) # define square norm function
function linesearch(z,
p,
grad,
B,
Δ,
c1::Real=1e-4,
c2::Real=0.9,
rho::Real=0.9,
maxiter::Int=1000)
# initialise a
a = norm2(p)
b = 2*dot(z,p)
c = norm2(z) - Δ^2
a = (-b + sqrt(b^2-4*a*c))/(2*a)
(isnan(a) || a < 0) && warn("Step size, $a, is screwed")
w1 = w2 = false
i = 0
m(s) = dot(s,grad) + 0.5*dot(s, B*s)
∇m(s) = grad + B*s
while !(w1 && w2) # While the Wolfe conditions aren't met
i += 1
s = z + a*p
# Strong Wolfe rules
w1 = (m(s) <= m(z) + c1*a*dot(p,∇m(z)))
w2 = (abs(dot(p,∇m(s))) <= c2*abs(dot(p,∇m(z))))
a *= rho
i > maxiter && (warn("Backtrack line search exceeds maxiter"); return z + 0.1*p)
end
s = z + a*p
norm(s) > Δ && warn("Outside of trust region")
return s
end
function steinhaug(grad::Vector, B::Matrix, Δ::Real; e=1e-6)
z, r = zero(grad), grad
p = -r
for i = 1:length(p)
(dot(p,B*p) <= 0) && return linesearch(z,p,grad,B,Δ)
a = norm2(r)/dot(p,B*p)
s = z + a*p
(norm(s) > Δ) && return linesearch(z,p,grad,B,Δ)
z = s
r0 = r; r += a*B*p
(norm(r) < e) && return s
p = (norm2(r)/norm2(r0))*p - r
end
end
function trust_region(f::Function, x::Vector; e=10e-6, maxiter=100, initial_region=0.1)
grad = forwarddiff_gradient(f, Float64, fadtype=:typed) # build jacobian
hes = forwarddiff_hessian(f, Float64, fadtype=:typed) # build hessian
# 0 < γ2 < 1 < γ1 region size controls
γ1, γ2 = 1.5, 0.5
# 0 < η2 < η1 < 1 how good our step is
η1, η2 = 0.75, 0.25
Δ = initial_region
g = grad(x); i=0
while (norm(g) > e) && (i < maxiter)
i+=1
g, B = grad(x), hes(x)
s = steinhaug(g, B, Δ)
ρ = -(f(x)-f(x+s))/(s⋅g-0.5dot(s,B*s))
if ρ >= η1 # very successful step
x += s
Δ = min(γ1*Δ,0.99) # enlarge region
elseif ρ >= η2 # successful step
x += s
else # bad step
Δ = max(γ2*Δ,0.01) # reduce region
end
@show i, Δ, f(x)
end
x
end
f(x::Vector) = 100(x[2]-x[1]^2)^2 + (1-x[1])^2
x0 = zeros(2)
@time x = trust_region(f,x0; initial_region=0.5)
@show f(x)
@show x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment