Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Pure Python implementation of some numerical optimizers
#!/usr/bin/env python
'''
Pure Python implementation of some numerical optimizers
Created on Jan 21, 2011
@author Jiahao Chen
'''
from scipy import dot, eye, finfo, asarray_chkfinite, sqrt, \
outer, zeros, isinf, arccos
from scipy.linalg import inv, norm, pinv, solve, LinAlgError
from copy import deepcopy
_epsilon = sqrt(finfo(float).eps)
#################
# Line searches #
#################
def MinpackLineSearch(f, df, x, p, df_x = None, f_x = None,
args = (), c1 = 1e-4, c2 = 0.4, amax = 50, xtol = _epsilon):
"Modified shamelessly from scipy.optimize"
if df_x == None: df_x = df(x)
if f_x == None: f_x = f(x)
from scipy.optimize import line_search
stp, _, _, _, _, _ = line_search(f, df, x, p, df_x, f_x, f_x, args, c1, c2, amax)
if stp == None: #Fall back to backtracking linesearch
return BacktrackingLineSearch(f, df, x, p, df_x, f_x, args)
else:
return stp
def MoreThuenteLinesearchIterate(delta, alpha_l, alpha_u, alpha_t, alpha_min,
alpha_max, f_l, g_l, f_t, g_t, f_u, g_u, isBracketed, Verbose):
#Section 4 of More and Thuente: Pick new trial alpha
if Verbose: print 'Section 4 of More and Thuente'
assert not (isBracketed and not min(alpha_l, alpha_u) < alpha_t < max(alpha_l, alpha_u)), 'Bracketing fail'
#if g_t * (alpha_t - alpha_l) < 0.0:
# if Verbose: print 'Going uphill; reverting to last good point'
# return isBracketed, alpha_l, alpha_l, alpha_t
if alpha_min == alpha_max:
print 'Squeezed!'
return isBracketed, alpha_min, alpha_min, alpha_min
assert alpha_min < alpha_max
sgnd = dot(g_t, g_l / abs(g_l))
isBounded = False
if f_t > f_l:
if Verbose:
print """Case 1: a higher function value. \
The minimum is bracketed by [a_l, a_t]. If the cubic step is closer to \
lower bound then take it, else take average of quadratic and cubic steps."""
#Calculate cubic and quadratic steps
da = alpha_t - alpha_l
d1 = g_l + g_t + 3 * (f_l - f_t) / da
s = max(g_l, g_t, d1)
d2 = s * ((d1 / s) ** 2 - (g_l / s * g_t / s)) ** 0.5
if alpha_t < alpha_l:
d2 = -d2
cubp = d2 - g_l + d1
cubq = ((d2 - g_l) + d2) + g_t
if abs(cubq) > _epsilon:
r = cubp / cubq
#Minimizer of cubic that interpolates f_l, f_t, g_l, g_t
alpha_c = alpha_l + r * da
#Minimizer of quadratic that interpolates f_l, f_t, g_l
alpha_q = alpha_l + ((g_l / ((f_l - f_t) / da + g_l)) * 0.5 * da)
else:
#print 'WARNING: VERY SMALL INTERVAL'
alpha_c = alpha_q = 0.5 * (alpha_l + alpha_t) #Invalid, choose midpoint
#Calculate new trial
if Verbose:
print 'Interpolating guesses:', alpha_c, alpha_q
if abs(alpha_c - alpha_l) < abs(alpha_q - alpha_l):
alpha_tt = alpha_c
else:
alpha_tt = 0.5 * (alpha_c + alpha_q)
isBracketed = True
isBounded = True
elif sgnd < 0: #f_t <= f_l assumed by here
if Verbose:
print """Case 2: lower function value and derivatives are of opposite sign. \
The minimum is bracketed by [a_l, a_t]. If the cubic step is closer to lower \
bound then take it, else take quadratic step."""
#Calculate cubic and quadratic steps
da = alpha_t - alpha_l
d1 = g_l + g_t + 3 * (f_l - f_t) / da
s = max(g_l, g_t, d1)
d2 = s * ((d1 / s) ** 2 - (g_l / s * g_t / s)) ** 0.5
if alpha_t < alpha_l:
d2 = -d2
cubp = d2 - g_t + d1
cubq = ((d2 - g_t) + d2) + g_l
if abs(cubq) > _epsilon:
r = cubp / cubq #Minimizer of cubic that interpolates f_l, f_t, g_l, g_t
alpha_c = alpha_t - r * da #Minimizer of quadratic that interpolates f_l, f_t, g_l
alpha_s = alpha_t - (g_l / (g_l - g_t)) * da
else:
#print 'WARNING: VERY SMALL INTERVAL'
alpha_s = alpha_c = 0.5 * (alpha_l + alpha_t) #Invalid, choose midpoint
#Calculate new trial
if Verbose:
print 'Interpolating guesses:', alpha_c, alpha_s
if abs(alpha_c - alpha_t) >= abs(alpha_s - alpha_t):
alpha_tt = alpha_c
else:
alpha_tt = alpha_s
isBracketed = True
isBounded = False
elif norm(g_t) <= norm(g_l): #g_t * g_l >= 0 and f_t <= f_l assumed by here
if Verbose:
print """Case 3: lower function value and derivatives are of same \
sign, and magnitude of derivative decreased.\n Cubic step used only if it tends \
to infinity in direction of step or if minimum of cubic lies beyond step. \
Otherwise cubic step is defined to be either stpmin or stpmax. Quadratic step is \
taken if it is closer to alpha_t, else take the farthest step."""
#Calculate cubic and quadratic steps
da = alpha_t - alpha_l
d1 = g_l + g_t + 3 * (f_l - f_t) / da
s = max(g_l, g_t, d1)
d2 = s * ((d1 / s) ** 2 - (g_l / s * g_t / s)) ** 0.5
if alpha_t < alpha_l:
d2 = -d2
cubp = d2 - g_t + d1
cubq = ((d2 - g_t) + d2) + g_l
r = cubp / cubq
#The case d2 == 0 arises only if cubic does not tend to infinity in
#direction of step
if (r < 0. and abs(d2) < _epsilon):
alpha_c = alpha_t - r * da
elif alpha_t > alpha_l:
alpha_c = alpha_max
else:
alpha_c = alpha_min
#Minimizer of quadratic that interpolates f_l, f_t, g_l
alpha_s = -alpha_t - (g_l / (g_l - g_t)) * da
if isBracketed:
if abs(alpha_c - alpha_t) < abs(alpha_s - alpha_t):
alpha_tt = alpha_c
else:
alpha_tt = alpha_s
else:
if abs(alpha_c - alpha_t) > abs(alpha_s - alpha_t):
alpha_tt = alpha_c
else:
alpha_tt = alpha_s
else:
if Verbose:
print """Case 4: lower function value and derivatives \
are of same sign, and magnitude of derivative did not decrease.
The minimum is NOT NECESSARILY bracketed. \
If bracketed, take cubic step, otherwise take alpha_min or alpha_max."""
#Minimizer of cubic that interpolates f_u, f_t, g_u, g_t
#Formula from Nocedal and Wright 2/e, Eq. 3.59, p 59
if isBracketed:
da = alpha_t - alpha_u
d1 = g_u + g_t + 3 * (f_u - f_t) / da
s = max(g_l, g_t, d1)
d2 = s * ((d1 / s) ** 2 - (g_l / s * g_t / s)) ** 0.5
if alpha_t < alpha_u:
d2 = -d2
cubp = d2 - g_t + d1
cubq = ((d2 - g_t) + d2) + g_u
if abs(cubq) > _epsilon:
r = cubp / cubq
alpha_tt = alpha_t - r * da
else:
#print 'WARNING: VERY SMALL INTERVAL'
alpha_tt = 0.5 * (alpha_l + alpha_t) #Invalid, choose midpoint
else:
if alpha_t > alpha_l:
alpha_tt = alpha_max
else:
alpha_tt = alpha_min
isBounded = False #abs(g_t) > abs(g_l)
if Verbose: print 'Now proceeding to update'
#Update interval of uncertainty
if f_t > f_l:
if Verbose: print 'Update case 1'
alpha_u, f_u, g_u = alpha_t, f_t, g_t
else:
if sgnd < 0:
if Verbose: print 'Update case 2'
alpha_u, f_u, g_u = alpha_l, f_l, g_l
else:
if Verbose: print 'Update case 3'
alpha_l, f_l, g_l = alpha_t, f_t, g_t
"""Refine trial value if it lies outside [alpha_t, alpha_u] or
is otherwise too close to alpha_u."""
alpha_tt = min(alpha_max, alpha_tt)
alpha_tt = max(alpha_min, alpha_tt) #Update
alpha_t = alpha_tt
if isBracketed and isBounded:
if alpha_u > alpha_l:
alpha_t = min(alpha_l + delta * (alpha_u - alpha_l), alpha_t)
else:
alpha_t = max(alpha_l + delta * (alpha_u - alpha_l), alpha_t)
if Verbose: print 'Refined trial value = ', alpha_t
return isBracketed, alpha_t, alpha_l, alpha_u
def MoreThuenteLineSearch(f, df, x, p, df_x = None, f_x = None, args = (),
mu = 0.01, eta = 0.5, ftol = _epsilon, gtol = _epsilon, rtol = _epsilon,
maxiter = 9, fmin = 0.0, alpha_min0 = 0.001, alpha_max0 = 100.0, alpha_t = 1.0,
p5 = 0.5, xtrapf = 4.0, delta = 0.66, Verbose = False):
"""Implements the line search algorithm of Mor\'e and Thuente, ACM TOMS 1994
"Line search algorithms with guaranteed sufficient decrease"
doi: 10.1145/192115.192132
Inputs
------
f
df
x
p
df_x = None
f_x = None
args = ()
mu = 0.01
mu is maximum required decrease
mu >= |f(x+dx) - f(x)| / |df(x).p|
mu in (0,1)
eta = 0.5
eta is maximum permissible value of |df(x+dx).p|/|df(x).p|
eta in (0,1)
ftol = _epsilon
gtol = _epsilon
rtol = _epsilon
rtol is relative tolerance for acceptable step
maxiter: Maximum number of iterations. Default: 9
fmin = 0.0
alpha_min0 = 0.001
alpha_max0 = 100.0
alpha_t = 1.0
p5 = 0.5
xtrapf: An extrapolation parameter. Recommended: between [1.1, 4.0].
Default: 4.0
delta = 0.66
Verbose = True
"""
##################################
# Evaluate function and derivative
##################################
if f_x == None: f_0 = f(x, *args)
else: f_0 = f_x
if df_x == None: df_0 = df(x, *args)
else: df_0 = df_x
g_0 = dot(df_0, p)
############################################################
# Check that initial gradient in search direction is downhill
#############################################################
assert g_0.shape == (1, 1) or g_0.shape == (), 'Scipy screwed up the calculation.'
assert g_0 < 0, 'Attempted to linesearch uphill'
############
# Initialize
############
FoundGoodPoint = isBracketed = False
gtest = ftol * g_0
width = alpha_max0 - alpha_min0
width1 = width / p5
f_t = f(x + alpha_t * p, *args)
df_t = df(x + alpha_t * p, *args)
g_t = dot(df_t, p)
# alpha = step size
# f = function value
# g = gradient value in descent direction
# _l = best step so far
# _u = other endpoint of interval of uncertainty
# _t = current step iterate
alpha_l = alpha_u = 0.0
f_l = f_u = f_0
g_l = g_u = g_0
if alpha_l == alpha_min0: alpha_l += _epsilon
if alpha_u == alpha_max0: alpha_u -= _epsilon
assert alpha_min0 != alpha_max0
for k in range(maxiter):
if Verbose: print 'Iteration', k, ':', alpha_t, 'in (', alpha_l, ',', alpha_u, ')'
############################################
# Initialize current interval of uncertainty
############################################
if isBracketed:
alpha_min = min(alpha_l, alpha_u)
alpha_max = max(alpha_l, alpha_u)
else:
alpha_min = alpha_l
alpha_max = alpha_t + xtrapf * abs(alpha_t - alpha_l)
assert alpha_min <= alpha_max
##################################################
# Safeguard trial step within pre-specified bounds
##################################################
alpha_t = max(alpha_t, alpha_min0)
alpha_t = min(alpha_t, alpha_max0)
#If something funny happened, set it to lowest point obtained thus far
if ((isBracketed and (alpha_t <= alpha_min or alpha_t >= alpha_max)) \
or (isBracketed and (alpha_max - alpha_min <= rtol * alpha_max)) \
):
if Verbose: print 'Something funny happened'
return alpha_l
#Evaluate function and gradient at new point
f_t = f(x + alpha_t * p, *args)
df_t = df(x + alpha_t * p, *args)
g_t = dot(df_t, p)
ftest1 = f_0 + alpha_t * gtest
######################
# Test for convergence
######################
if (alpha_l == alpha_max0 and f_t <= ftest1 and g_t <= gtest):
if Verbose: print 'Stuck on upper bound'
break
if (alpha_l == alpha_min0 and f_t > ftest1 and g_t >= gtest):
if Verbose: print 'Stuck on lower bound'
break
#Check for a) sufficient decrease AND b) strong curvature criterion
if ((f_t <= f_0 + mu * g_0 * alpha_t) \
and abs(g_t) <= eta * abs(g_0)):
if Verbose: print 'Decrease criterion satisfied'
break
#Check for bad stuff
if isBracketed:
if (abs(alpha_max - alpha_min) >= delta * width1):
print 'Warning: Could not satisfy curvature conditions'
alpha_t = alpha_min + p5 * (alpha_max - alpha_min)
width1 = width
width = abs(alpha_max - alpha_min)
#If interval is bracketed to sufficient precision, break
if k > 0 and abs(alpha_l - alpha_u) < rtol:
if Verbose: print 'Interval bracketed: alpha = ', alpha_t
print 'Line search stuck. Emergency abort.'
break
########################################################
# Nocedal's modification of the More-Thuente line search
########################################################
ftest1 = f_0 + alpha_l * gtest
# Seek a step for which the modified function has a nonpositive value
# and a nonnegative derivative
if (not FoundGoodPoint) and f_t <= ftest1 and g_t >= g_0 * min(ftol, gtol):
FoundGoodPoint = True
# The modified function is used only if we don't have a step where the
# previous condition is attained, and if a lower function value has been
# obtained but it is not low enough
if (not FoundGoodPoint) and f_t <= f_l and f_t > ftest1:
if Verbose: print "Performing Nocedal's modification"
f_mt = f_t - alpha_t * gtest
f_ml = f_l - alpha_l * gtest
f_mu = f_u - alpha_u * gtest
g_mt = g_t - gtest
g_ml = g_l - gtest
g_mu = g_u - gtest
isBracketed, alpha_t, alpha_l, alpha_u = MoreThuenteLinesearchIterate\
(delta, alpha_l, alpha_u, alpha_t, alpha_min, alpha_max, f_ml, \
g_ml, f_mt, g_mt, f_mu, g_mu, isBracketed, Verbose)
f_l = f_ml + alpha_l * gtest
f_u = f_mu + alpha_u * gtest
g_l = g_ml + gtest
g_u = g_mu + gtest
else:
if Verbose: print 'Performing original More-Thuente line search'
isBracketed, alpha_t, alpha_l, alpha_u = MoreThuenteLinesearchIterate\
(delta, alpha_l, alpha_u, alpha_t, alpha_min, alpha_max, f_l, g_l, \
f_t, g_t, f_u, g_u, isBracketed, Verbose)
###########################
# Force sufficient decrease
###########################
if isBracketed:
if Verbose: print 'Force sufficient decrease'
if abs(alpha_u - alpha_l) >= delta * width1:
alpha_t = alpha_l + p5 * (alpha_u - alpha_l)
width1 = width
width = abs(alpha_u - alpha_l)
if Verbose: print k + 1, 'iterations in More-Thuente line search'
return alpha_t
def CubicLineSearch(f, df, x, p, df_x, f_x = None, args = (),
alpha = 0.0001, beta = 0.9, eps = 0.0001, Verbose = False):
if f_x is None:
f_x = f(x, *args)
assert df_x.T.shape == p.shape
assert 0 < alpha < 1, 'Invalid value of alpha in backtracking linesearch'
assert 0 < beta < 1, 'Invalid value of beta in backtracking linesearch'
phi = lambda c: f(x + (c * p), *args)
phi0 = f_x
derphi0 = dot(df_x, p)
assert derphi0.shape == (1, 1) or derphi0.shape == ()
assert derphi0 < 0, 'Attempted to linesearch uphill'
stp = 1.0
#Loop until Armijo condition is satisfied
while phi(stp) > phi0 + alpha * stp * derphi0:
#Quadratic interpolant
stp_q = -derphi0 * alpha ** 2 / (2 * phi(stp) - phi0 - derphi0 * stp)
if phi(stp_q) > f_x + alpha * stp_q * derphi0:
return stp_q
#Quadratic interpolant bad, use cubic interpolant
A = zeros((2, 2))
b = zeros((2,))
A[0, 0] = stp * stp
A[0, 1] = -stp_q ** 2
A[1, 0] = -stp ** 3
A[1, 1] = stp_q ** 3
b[0] = phi(stp_q) - phi0 - derphi0 * stp_q
b[1] = phi(stp) - phi0 - derphi0 * stp
xx = dot(A, b) / ((stp * stp_q) ** 2 * (stp_q - stp))
stp_c = (-xx[1] + (xx[1] ** 2 - 3 * xx[0] * derphi0) ** 0.5) / (3 * xx[0])
#Safeguard: if new alpha too close to predecessor or too small
if abs(stp_c - stp) < 0.001 or stp_c < 0.001:
stp *= beta
else:
stp = stp_c
#print stp
#print stp
return stp
def BacktrackingLineSearch(f, df, x, p, df_x = None, f_x = None, args = (),
alpha = 0.0001, beta = 0.9, eps = _epsilon, Verbose = False):
"""
Backtracking linesearch
f: function
x: current point
p: direction of search
df_x: gradient at x
f_x = f(x) (Optional)
args: optional arguments to f (optional)
alpha, beta: backtracking parameters
eps: (Optional) quit if norm of step produced is less than this
Verbose: (Optional) Print lots of info about progress
Reference: Nocedal and Wright 2/e (2006), p. 37
Usage notes:
-----------
Recommended for Newton methods; less appropriate for quasi-Newton or conjugate gradients
"""
if f_x is None:
f_x = f(x, *args)
if df_x is None:
df_x = df(x, *args)
assert df_x.T.shape == p.shape
assert 0 < alpha < 1, 'Invalid value of alpha in backtracking linesearch'
assert 0 < beta < 1, 'Invalid value of beta in backtracking linesearch'
derphi = dot(df_x, p)
assert derphi.shape == (1, 1) or derphi.shape == ()
assert derphi < 0, 'Attempted to linesearch uphill'
stp = 1.0
fc = 0
len_p = norm(p)
#Loop until Armijo condition is satisfied
while f(x + stp * p, *args) > f_x + alpha * stp * derphi:
stp *= beta
fc += 1
if Verbose: print 'linesearch iteration', fc, ':', stp, f(x + stp * p, *args), f_x + alpha * stp * derphi
if stp * len_p < eps:
print 'Step is too small, stop'
break
#if Verbose: print 'linesearch iteration 0 :', stp, f_x, f_x
if Verbose: print 'linesearch done'
#print fc, 'iterations in linesearch'
return stp
#######################
# Trust region models #
#######################
class DogLeg:
def __init__(self, f, df, B, x, g, f_x = None, df_x = None, TrustRadius = 1.0):
self.TrustRadius = TrustRadius
self.f = f #Function
self.df = df #Gradient function
self.B = B #Approximate Hessian
self.x = x #Current point
self.g = g #Search direction
def solve(self):
g = self.g
B = self.B
#Step 1: Find unconstrained solution
pB = g
#Step 2: Find Cauchy point
pU = -dot(g, g) / dot(g, dot(B, g)) * g
#INCOMPLETE
################
# Extrapolator #
################
class DIISExtrapolator:
def __init__(self, max = 300):
self.maxvectors = max
self.Reset()
def Reset(self):
self.errorvectors = []
self.coordvectors = []
def AddData(self, error, coord):
#Check max
while self.GetNumVectors() - self.maxvectors >= 0:
self.errorvectors.pop()
self.coordvectors.pop()
from numpy import asarray
#print asarray(error).flatten().shape, asarray(coord).flatten().shape
self.errorvectors.append(deepcopy(asarray(error).flatten()))
self.coordvectors.append(deepcopy(asarray(coord).flatten()))
def GetNumVectors(self):
assert len(self.errorvectors) == len(self.coordvectors)
return len(self.errorvectors)
def Extrapolate(self):
#Construct Overlap matrix (aka Gram matrix)
N = self.GetNumVectors()
if N == 0: return None
B = zeros((N + 1, N + 1))
for i in range(N):
for j in range(N):
B[i, j] = dot(self.errorvectors[i], self.errorvectors[j])
B[N, :] = -1
B[:, N] = -1
B[N, N] = 0
v = zeros(N + 1)
v[N] = -1
#Solve for linear combination
xex = self.coordvectors[0] * 0
try:
c = solve(B, v)
assert c.shape == v.shape
#Generate interpolated coordinate
for i in range(N):
xex += c[i] * self.coordvectors[i]
except: #Linear dependency detected; trigger automatic reset
c = dot(pinv(B), v)
#Generate interpolated coordinate
for i in range(N):
xex += c[i] * self.coordvectors[i]
print 'Linear dependency detected in DIIS; resetting'
self.Reset()
return xex
#######################
# Quasi-Newton driver #
#######################
class ApproxHessianBase:
def __init__(self, N = None):
if N is None:
self.M = None
else:
self.M = eye(N)
def Reset(self):
self.SetToIdentity()
def SetToIdentity(self):
self.M = eye(self.M.shape[0])
def SetToScaledIdentity(self, c = 1):
self.M = c * eye(self.M.shape[0])
def SetToMatrix(self, K):
self.M = K
def GetHessian(self):
return self.M
def Operate(self, g):
assert False, 'Should never run this'
def Update(self, *args):
assert False, 'Should never run this'
def __rshift__(self, g): #Overload >> operator
"""If self.B is defined, solves for x in B x = -g
"""
try:
dx = self.Operate(g)
except ValueError, e:
print e
print g.shape, self.M.shape
exit()
try:
return asarray_chkfinite(dx)
except (LinAlgError, ValueError):
print 'Restarting approximate Hessian'
'Trigger restart, fall back to steepest descent'
self.SetToIdentity()
return g
class ApproxHessian(ApproxHessianBase):
def __init__(self, N = None):
ApproxHessianBase.__init__(self, N)
def SetToNumericalHessian(self, df, x, step = 0.001, UseStencil = None):
if UseStencil is None:
import Stencil
UseStencil = Stencil.FirstOrderCentralDifferenceStencil()
#Initialize Hessian as numerical Hessian
self.M = UseStencil.ApplyToFunction(df, x, step)
def Operate(self, g):
"""If self.B is defined, solves for x in B x = -g
"""
try:
dx = -solve(self.M, g)
except LinAlgError:
print 'Warning, using pseudoinverse'
dx = -pinv(self.M, g)
return dx
class ApproxInvHessian(ApproxHessianBase):
def __init__(self, N = None):
ApproxHessianBase.__init__(self, N)
def SetToNumericalHessian(self, df, x, step = 0.001, UseStencil = None):
if UseStencil is None:
import Stencil
UseStencil = Stencil.FirstOrderCentralDifferenceStencil()
#Initialize Hessian as numerical Hessian
K = UseStencil.ApplyToFunction(df, x, step)
try:
self.M = inv(K)
except LinAlgError:
self.M = pinv(K)
def GetHessian(self):
try:
return inv(self.M)
except LinAlgError:
print 'Warning, using pseudoinverse'
return pinv(self.M)
def Operate(self, g):
"""If self.H is defined, returns x = H g
"""
return - dot(self.M, g)
class BFGSInvHessian(ApproxInvHessian):
def Update(self, s, y, s_dot_y = None):
if s_dot_y is None: s_dot_y = dot(s, y)
rhok = 1.0 / s_dot_y
I = eye(self.M.shape[0])
A1 = I - outer(s, y) * rhok
A2 = I - outer(y, s) * rhok
self.M = dot(A1, dot(self.M, A2))
self.M += +rhok * outer(s, s)
class LBFGSInvHessian(ApproxInvHessian):
def __init__(self, M = 20):
self.maxnumvecs = int(M)
self.rho = []
self.gamma = 1.0
self.Reset()
print 'Initialized L-BFGS Hessian approximation with M =', self.maxnumvecs
def Reset(self):
self.s = []
self.y = []
self.rho = []
def SetToIdentity(self): #Reset
self.Reset()
def SetToScaledIdentity(self, c = None): #Reset
self.Reset()
if c is not None: self.gamma = c
def Update(self, s, y, s_dot_y = None):
if len(self.s) == self.maxnumvecs:
self.s.pop(0)
self.y.pop(0)
self.rho.pop(0)
if s_dot_y is None: s_dot_y = dot(s, y)
rho = 1.0 / s_dot_y
if isinf(rho):
print 'Warning, s . y = 0; ignoring pair'
else:
self.s.append(s)
self.y.append(y)
self.rho.append(rho)
def GetNumVecs(self):
assert len(self.s) == len(self.y)
return len(self.s)
def __rshift__(self, g): #Overload >> as multiply -H g
q = g
m = self.GetNumVecs()
s = self.s
y = self.y
a = zeros(m)
for i in range(m - 1, -1, -1):
a[i] = self.rho[i] * dot(s[i], q)
q = q - a[i] * y[i]
#r = Hguess * q
gamma = self.gamma
if m > 0: gamma = dot(s[-1], y[-1]) / dot(y[-1], y[-1])
#if m > 0: gamma = 0.5 * gamma + 0.5 * dot(s[-1], y[-1]) / dot(y[-1], y[-1])
self.gamma = gamma
print 'gamma=',gamma
r = gamma * q
for i in range(m):
b = self.rho[i] * dot(y[i], r)
r = r + s[i] * (a[i] - b)
return - r
def Optimizer(f, x0, df = None, xtol = 0.1*_epsilon**0.5,
gtol = 0.1*_epsilon**0.5, maxstep = 1, maxiter = None,
line_search = BacktrackingLineSearch, DIISTrigger = 0):
#Hijack code with call to L-BFGS-B
#from scipy.optimize import fmin_l_bfgs_b
#return fmin_l_bfgs_b(f, x0, df, m = len(x0), pgtol = gtol, factr = 1.0/xtol, iprint=1)[0]
#Do LBFGS
#If true, will fall back on BFGS automatically
DoLBFGS = True
DoLineSearch = True
TrustRadius = 0.5
MaxTrustRadius = 10.0
#Cosine squared of uphill angle
UphillAngleThresh = 0.0
UphillFThresh = 0.001
#Massage x0
x0 = asarray_chkfinite(x0).ravel()
if x0.ndim == 0: x0.shape = (1,)
#Set default number of maxiters
if maxiter is None: maxiter = max(20000, len(x0) * 4)
###########
#Initialize
###########
x = x0
f_x, df_x = f(x0), df(x0)
norm_g = norm(df_x)
N = len(x0)
if DoLBFGS:
#Dimension of 3 --- 20 is normal
#H = LBFGSInvHessian(2 + N ** 0.2)
H = LBFGSInvHessian(N)
else:
H = BFGSInvHessian(N)
H.SetToNumericalHessian(df, x)
if DIISTrigger != 0: DIIS = DIISExtrapolator()
for k in range(maxiter):
#######################################################################
# Solve for unconstrained minimization direction p such that H p = -x #
#######################################################################
p = H >> df_x #overloaded
AcceptStep = True
##########################################################
# Safeguard: perform linesearch or trust-region limiting #
# to determine actual step s to take #
##########################################################
if DoLineSearch:
####################
# Perform linesearch
####################
alpha = line_search(f, df, x, p, df_x, f_x)
try:
alpha = line_search(f, df, x, p, df_x, f_x)
assert alpha > 0
except AssertionError:
print 'Line search failed; falling back to exact line search'
alpha = MinpackLineSearch(f, df, x, p, df_x, f_x)
assert alpha > 0
s = alpha * p
norm_s = norm(s)
else:
##############
# Do dog-leg #
##############
#First do line search along steepest descent direction
alpha = line_search(f, df, x, -df_x, df_x, f_x)
#Cauchy point is x - alpha * df_x
#Next, move toward unconstrained solution from Cauchy point
dogleg = p + alpha * df_x
alpha2 = line_search(f, df, x - alpha * df_x, dogleg)
s = -alpha * df_x + alpha2 * dogleg
#Stupidly enforce simple trust radius
norm_s = norm(s)
while norm_s > TrustRadius:
alpha2 *= 0.9
s = -alpha * df_x + alpha2 * dogleg
norm_s = norm(s)
######################################
# Do DIIS Extrapolation if requested #
######################################
DidDIIS = False
if DIISTrigger != 0:
data = zeros(len(df_x) + 1)
data[1:] = df_x
data[0] = f_x
DIIS.AddData(error = data, coord = x)
if DIIS.GetNumVectors() % DIISTrigger == 0:
mindf = min([norm(xx[1:]) for xx in DIIS.errorvectors])
xdnew = DIIS.Extrapolate()
norm_df_xdnew = norm(df(xdnew))
f_xdnew = f(xdnew)
if norm_df_xdnew < mindf and f_xdnew < f_x:
#Accept only if there is actually a reduction in |df|
print 'Accepted DIIS extrapolation'
DidDIIS = True
s = xdnew - x
#if DIISTrigger > 1: DIISTrigger -= 1
#print 'Improvement ratios', (f_x - f_xdnew) / f_x, (mindf - norm_df_xdnew) / mindf
if (f_x - f_xdnew) < 0.000001 * f_x and (mindf - norm_df_xdnew) < 0.00001 * mindf :
print 'DIIS reaching diminishing returns, resetting'
DIIS.Reset()
#DIISTrigger += 1
else:
print 'DIIS wanted to go to', f_xdnew, norm_df_xdnew
print "Rejected DIIS extrapolation"
DIIS.Reset()
DIISTrigger += 1
AcceptStep = False
if DIISTrigger > 50:
print 'Turning off DIIS'
DIISTrigger = 0
####################################################
# Take step; compute function value and derivative #
####################################################
xnew = x + s
f_xnew = f(xnew)
df_xnew = df(xnew)
y = df_xnew - df_x
norm_y = norm(y)
norm_gnew = norm(df_xnew)
s_y = dot(s, y)
#############################################
# If doing line search, update trust radius #
#############################################
if True:
ReductionRatio = 2 * (f_xnew - f_x) / dot(s, df_x)
print '\nReduction ratio:', ReductionRatio
if ReductionRatio <= 0:
print 'Iterations are producing a WORSE answer!\nBailing.'
print 'Rejecting step and restarting BFGS'
AcceptStep = False
if DoLBFGS:
H.Reset()
H.Update(s, y)
else:
gamma = s_y / norm_y ** 2
H.SetToScaledIdentity(gamma)
if not DoLineSearch: #ie do trust region
#Compute model quality?
ReductionRatio = 2 * (f_xnew - f_x) / dot(s, df_x)
print '\nReduction ratio:', ReductionRatio
if ReductionRatio > 0.75:
if norm(s) == TrustRadius:
TrustRadius = min (2 * TrustRadius, MaxTrustRadius)
elif ReductionRatio < 0.25:
TrustRadius *= 0.25
###################
# Check convergence
###################
if norm_g < gtol:
break
#################################
# Make sure we are going downhill
#################################
NegCosDescentAngle = s_y / (norm_s * norm_y)
DescentAngle = arccos(-NegCosDescentAngle)
DescentRatio = (f_x - f_xnew) / f_x
isGoingUphill = NegCosDescentAngle < UphillAngleThresh
isIncreasing = -DescentRatio > UphillFThresh
if isGoingUphill or isIncreasing:
if isGoingUphill:
print '\nIteration %d: WARNING: Going uphill with angle %f' % \
(k, -DescentAngle)
if isIncreasing:
print '\nIteration %d: WARNING: function increased by ratio %f' % \
(k, -DescentRatio)
print 'Rejecting step and restarting BFGS'
AcceptStep = False
if DoLBFGS:
H.Reset()
H.Update(s, y)
else:
gamma = s_y / norm_y ** 2
H.SetToScaledIdentity(gamma)
else:
#####################################
# Accept step, update quasi-Hessian #
#####################################
if not DidDIIS:
H.Update(s, y)
#####################################################
# EXPERIMENTAL: Heuristics for switching algorithms #
#####################################################
if False and DoLineSearch and DescentRatio < 1e-2:
print '\nTurning off linesearch'
DoLineSearch = False
if False and DescentRatio < 1e-2 and DIISTrigger==0:
print 'Do DIIS'
DIISTrigger = 2
DIIS = DIISExtrapolator()
#print 'Switching to BFGS'
#H = BFGSInvHessian(N)
#gamma = dot(s, y) / dot(y, y)
#H.SetToScaledIdentity(gamma)
###################################################
# Accept step: update function value and gradient #
###################################################
if AcceptStep:
x = xnew
df_x = df_xnew
f_x = f_xnew
norm_g = norm_gnew
print "Iteration %d: f(x) = %f (%fx), ||f'(x)|| = %f, || dx || = %f, descent angle = %f" \
% (k, f_x, DescentRatio, norm_g, norm_s, DescentAngle)
if k == maxiter - 1: print 'Maximum number of iterations reached'
print 'Quasi-Newton done'
"""
W = ApproxHessian()
W.SetToNumericalHessian(df, x)
Hess = W.GetHessian()
from numpy.linalg import svd
for eigval in svd(Hess)[1]:
print eigval
"""
exit()
return x
@KLEMokoena

This comment has been minimized.

Copy link

KLEMokoena commented Oct 8, 2018

MY MAN! MY BOY!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.