scturtle/newton.py

## newton.py
#!/bin/env python
''' L-BFGS algorithm http://aria42.com/blog/2014/12/understanding-lbfgs/ '''
import numpy as np
from numpy import linalg
from collections import deque


class LineSearch:

    ''' backtracing line search algorithm '''

    def __init__(self, alpha):
        self.alpha = alpha
        self.beta = 0.01
        self.stepLenThresh = 1e-10

    def lineSearch(self, f, xs, dr, verbose=False):
        f0, grad = f(xs)

        if verbose:
            print 'L2(grad): {}'.format(linalg.norm(grad, 2))

        if linalg.norm(grad, 2) < self.stepLenThresh:
            # already at minimum
            if verbose:
                print 'lineSearch: Already at minimum {}'.format(
                    linalg.norm(grad, 2))
            return 0, 0

        delta = self.beta * np.dot(grad, dr)
        stepLen = 1.0

        while stepLen > self.stepLenThresh:
            fnVal, _ = f(xs + stepLen * dr)
            if verbose:
                print 'lineSearch: {}, curVal: {:.5f}, trgVal: {:.5f}'.format(
                    self.alpha, fnVal, f0 + stepLen * delta)
            # Armijo-Goldstein condition
            if fnVal <= f0 + stepLen * delta:
                return stepLen, fnVal
            stepLen *= self.alpha

        raise Exception("Step-size underflow")


def newtonStep(f, x, l, hinv):
    ''' x_{n} -> x_{n+1} '''
    _, grad = f(x)
    # \Delta x = - \invhessian_n \grad_n
    dr = -hinv(grad)
    # \alpha = \min_{\alpha \geq 0} f(x_{n} - \alpha d)
    alpha, _ = l.lineSearch(f, x, dr)
    # x_{n+1} = x_{n} - \alpha d
    return x + alpha * dr


def newtonMinimize(f, update, initGuess,
                   maxIters=0, tolerance=0, alpha=0.5,
                   verbose=False):
    x = initGuess

    i = 0
    while maxIters == 0 or i <= maxIters:
        i += 1
        # search and step
        fx, _ = f(x)
        xnew = newtonStep(f, x, LineSearch(alpha), update(x))
        fxnew, gradnew = f(xnew)
        assert fxnew <= fx, ('newtonStep did not minimize: '
                             '{:.5f} -> {:.5f}').format(fx, fxnew)
        reldiff = np.abs(fx - fxnew) / np.abs(fxnew)
        x = xnew

        if verbose:
            print 'Iteration {}: began with {}, ended with value {}'.format(
                i, fx, fxnew)
            print 'Iteration {}: at x={}'.format(i, x)
            print 'Iteration {}: gradient with {} and relDiff {}'.format(
                i, linalg.norm(gradnew, 2), reldiff)
            print

        if (reldiff <= tolerance or
                linalg.norm(gradnew, 2) <= tolerance):
            break
    return x


class GradientDescent:

    def __call__(self, x):
        def hinv(dr):
            ''' hessian is identity matrix, just use grad as direction '''
            return dr
        return hinv


class HistoryEntry:

    def __init__(self, xdelta, graddelta):
        self.xdelta = xdelta
        self.graddelta = graddelta
        self.curvature = np.dot(xdelta, graddelta)
        assert self.curvature >= 0.0, "Negative Curvature: {:.5f}".format(
            self.curvature)


class LBFGS:

    def __init__(self, f, maxHistory):
        self.f = f
        self.history = deque([], maxHistory)
        self.lastx = None
        self.lastgrad = None

    def __call__(self, x):
        if self.lastx is None:
            gamma = 1.0
        else:
            # input and gradient deltas
            _, grad = self.f(x)
            xdelta = x - self.lastx
            graddelta = grad - self.lastgrad
            # store
            entry = HistoryEntry(xdelta, graddelta)
            gamma = entry.curvature / np.dot(graddelta, graddelta)
            self.history.append(entry)

        self.lastgrad = self.f(x)[1] if self.lastx is None else grad
        self.lastx = x

        def hinv(dr):
            ''' \invhessian_n d '''
            result = dr.copy()
            alphas = []
            # backward pass
            for entry in reversed(self.history):
                alpha = np.dot(entry.xdelta, result) / entry.curvature
                alphas.append(alpha)
                result -= alpha * entry.graddelta
            result *= 1. / gamma
            # forward pass
            for entry in self.history:
                alpha = alphas.pop()
                beta = np.dot(entry.graddelta, result) / entry.curvature
                result += (alpha - beta) * entry.xdelta
            return result

        return hinv


def test():
    # x^2
    def xSquared(xs):
        val = xs[0] * xs[0]
        grad = np.array([2. * xs[0]])
        return val, grad

    # (x-1)^4 + (y+2)^4
    def quarticFn(xs):
        x, y = xs[0], xs[1]
        val = np.power(x - 1, 4) + np.power(y + 2, 4)
        grad = np.array([4. * np.power(x - 1, 3), 4. * np.power(y + 2, 3)])
        return val, grad

    # TestLineSearch
    ls = LineSearch(0.5)
    stepLen, fnVal = ls.lineSearch(xSquared, np.array([1.]), np.array([-1.]))
    assert stepLen == 1.0 and fnVal == 0.0, "minimize along a direction"
    stepLen, fnVal = ls.lineSearch(xSquared, np.array([0.]), np.array([1.]))
    assert stepLen == 0.0 and fnVal == 0.0, "already at minimum"

    # TestGradientDescent
    xmin = newtonMinimize(xSquared,
                          update=GradientDescent(),
                          initGuess=np.array([1.0]))
    assert xmin[0] == 0.0

    xmin = newtonMinimize(quarticFn,
                          update=GradientDescent(),
                          initGuess=np.array([0.0, 0.0]),
                          tolerance=1e-5,
                          verbose=False)
    minFx, _ = quarticFn(xmin)
    assert abs(minFx) < 1e-4, minFx

    # TestLBFGS
    xmin = newtonMinimize(xSquared,
                          update=LBFGS(xSquared, 2),
                          initGuess=np.array([1.0]))
    assert xmin[0] == 0.0

    xmin = newtonMinimize(quarticFn,
                          update=LBFGS(quarticFn, 2),
                          initGuess=np.array([0.0, 0.0]),
                          tolerance=1e-5, verbose=True)
    minFx, _ = quarticFn(xmin)
    assert abs(minFx) < 1e-4, minFx

if __name__ == '__main__':
    test()
	#!/bin/env python
	''' L-BFGS algorithm http://aria42.com/blog/2014/12/understanding-lbfgs/ '''
	import numpy as np
	from numpy import linalg
	from collections import deque


	class LineSearch:

	''' backtracing line search algorithm '''

	def __init__(self, alpha):
	self.alpha = alpha
	self.beta = 0.01
	self.stepLenThresh = 1e-10

	def lineSearch(self, f, xs, dr, verbose=False):
	f0, grad = f(xs)

	if verbose:
	print 'L2(grad): {}'.format(linalg.norm(grad, 2))

	if linalg.norm(grad, 2) < self.stepLenThresh:
	# already at minimum
	if verbose:
	print 'lineSearch: Already at minimum {}'.format(
	linalg.norm(grad, 2))
	return 0, 0

	delta = self.beta * np.dot(grad, dr)
	stepLen = 1.0

	while stepLen > self.stepLenThresh:
	fnVal, _ = f(xs + stepLen * dr)
	if verbose:
	print 'lineSearch: {}, curVal: {:.5f}, trgVal: {:.5f}'.format(
	self.alpha, fnVal, f0 + stepLen * delta)
	# Armijo-Goldstein condition
	if fnVal <= f0 + stepLen * delta:
	return stepLen, fnVal
	stepLen *= self.alpha

	raise Exception("Step-size underflow")


	def newtonStep(f, x, l, hinv):
	''' x_{n} -> x_{n+1} '''
	_, grad = f(x)
	# \Delta x = - \invhessian_n \grad_n
	dr = -hinv(grad)
	# \alpha = \min_{\alpha \geq 0} f(x_{n} - \alpha d)
	alpha, _ = l.lineSearch(f, x, dr)
	# x_{n+1} = x_{n} - \alpha d
	return x + alpha * dr


	def newtonMinimize(f, update, initGuess,
	maxIters=0, tolerance=0, alpha=0.5,
	verbose=False):
	x = initGuess

	i = 0
	while maxIters == 0 or i <= maxIters:
	i += 1
	# search and step
	fx, _ = f(x)
	xnew = newtonStep(f, x, LineSearch(alpha), update(x))
	fxnew, gradnew = f(xnew)
	assert fxnew <= fx, ('newtonStep did not minimize: '
	'{:.5f} -> {:.5f}').format(fx, fxnew)
	reldiff = np.abs(fx - fxnew) / np.abs(fxnew)
	x = xnew

	if verbose:
	print 'Iteration {}: began with {}, ended with value {}'.format(
	i, fx, fxnew)
	print 'Iteration {}: at x={}'.format(i, x)
	print 'Iteration {}: gradient with {} and relDiff {}'.format(
	i, linalg.norm(gradnew, 2), reldiff)
	print

	if (reldiff <= tolerance or
	linalg.norm(gradnew, 2) <= tolerance):
	break
	return x


	class GradientDescent:

	def __call__(self, x):
	def hinv(dr):
	''' hessian is identity matrix, just use grad as direction '''
	return dr
	return hinv


	class HistoryEntry:

	def __init__(self, xdelta, graddelta):
	self.xdelta = xdelta
	self.graddelta = graddelta
	self.curvature = np.dot(xdelta, graddelta)
	assert self.curvature >= 0.0, "Negative Curvature: {:.5f}".format(
	self.curvature)


	class LBFGS:

	def __init__(self, f, maxHistory):
	self.f = f
	self.history = deque([], maxHistory)
	self.lastx = None
	self.lastgrad = None

	def __call__(self, x):
	if self.lastx is None:
	gamma = 1.0
	else:
	# input and gradient deltas
	_, grad = self.f(x)
	xdelta = x - self.lastx
	graddelta = grad - self.lastgrad
	# store
	entry = HistoryEntry(xdelta, graddelta)
	gamma = entry.curvature / np.dot(graddelta, graddelta)
	self.history.append(entry)

	self.lastgrad = self.f(x)[1] if self.lastx is None else grad
	self.lastx = x

	def hinv(dr):
	''' \invhessian_n d '''
	result = dr.copy()
	alphas = []
	# backward pass
	for entry in reversed(self.history):
	alpha = np.dot(entry.xdelta, result) / entry.curvature
	alphas.append(alpha)
	result -= alpha * entry.graddelta
	result *= 1. / gamma
	# forward pass
	for entry in self.history:
	alpha = alphas.pop()
	beta = np.dot(entry.graddelta, result) / entry.curvature
	result += (alpha - beta) * entry.xdelta
	return result

	return hinv


	def test():
	# x^2
	def xSquared(xs):
	val = xs[0] * xs[0]
	grad = np.array([2. * xs[0]])
	return val, grad

	# (x-1)^4 + (y+2)^4
	def quarticFn(xs):
	x, y = xs[0], xs[1]
	val = np.power(x - 1, 4) + np.power(y + 2, 4)
	grad = np.array([4. * np.power(x - 1, 3), 4. * np.power(y + 2, 3)])
	return val, grad

	# TestLineSearch
	ls = LineSearch(0.5)
	stepLen, fnVal = ls.lineSearch(xSquared, np.array([1.]), np.array([-1.]))
	assert stepLen == 1.0 and fnVal == 0.0, "minimize along a direction"
	stepLen, fnVal = ls.lineSearch(xSquared, np.array([0.]), np.array([1.]))
	assert stepLen == 0.0 and fnVal == 0.0, "already at minimum"

	# TestGradientDescent
	xmin = newtonMinimize(xSquared,
	update=GradientDescent(),
	initGuess=np.array([1.0]))
	assert xmin[0] == 0.0

	xmin = newtonMinimize(quarticFn,
	update=GradientDescent(),
	initGuess=np.array([0.0, 0.0]),
	tolerance=1e-5,
	verbose=False)
	minFx, _ = quarticFn(xmin)
	assert abs(minFx) < 1e-4, minFx

	# TestLBFGS
	xmin = newtonMinimize(xSquared,
	update=LBFGS(xSquared, 2),
	initGuess=np.array([1.0]))
	assert xmin[0] == 0.0

	xmin = newtonMinimize(quarticFn,
	update=LBFGS(quarticFn, 2),
	initGuess=np.array([0.0, 0.0]),
	tolerance=1e-5, verbose=True)
	minFx, _ = quarticFn(xmin)
	assert abs(minFx) < 1e-4, minFx

	if __name__ == '__main__':
	test()