Edenhofer/eggholder_repro.py

## eggholder_repro.py
# %%
from functools import partial
from typing import NamedTuple, Optional, Tuple, Union

import jax
from jax import lax
from jax import numpy as jnp

N_RESET = 20

class CGResults(NamedTuple):
    x: jnp.ndarray
    nit: Union[int, jnp.ndarray]
    nfev: Union[int, jnp.ndarray]  # number of matrix-evaluations
    info: Union[int, jnp.ndarray]
    success: Union[bool, jnp.ndarray]


# The following is code adapted from Nicholas Mancuso to work with pytrees
class _QuadSubproblemResult(NamedTuple):
    step: jnp.ndarray
    hits_boundary: Union[bool, jnp.ndarray]
    pred_f: Union[float, jnp.ndarray]
    nit: Union[int, jnp.ndarray]
    nfev: Union[int, jnp.ndarray]
    njev: Union[int, jnp.ndarray]
    nhev: Union[int, jnp.ndarray]
    success: Union[bool, jnp.ndarray]


class _CGSteihaugState(NamedTuple):
    z: jnp.ndarray
    r: jnp.ndarray
    d: jnp.ndarray
    step: jnp.ndarray
    energy: Union[None, float, jnp.ndarray]
    hits_boundary: Union[bool, jnp.ndarray]
    done: Union[bool, jnp.ndarray]
    nit: Union[int, jnp.ndarray]
    nhev: Union[int, jnp.ndarray]


def second_order_approx(
    p: jnp.ndarray,
    cur_val: Union[float, jnp.ndarray],
    g: jnp.ndarray,
    hessp_at_xk,
) -> Union[float, jnp.ndarray]:
    return cur_val + jnp.vdot(g, p) + 0.5 * jnp.vdot(p, hessp_at_xk(p))


def get_boundaries_intersections(
    z: jnp.ndarray, d: jnp.ndarray, trust_radius: Union[float, jnp.ndarray]
):
    a = jnp.vdot(d, d)
    b = 2 * jnp.vdot(z, d)
    c = jnp.vdot(z, z) - trust_radius**2
    sqrt_discriminant = jnp.sqrt(b * b - 4 * a * c)

    aux = b + jnp.copysign(sqrt_discriminant, b)
    ta = -aux / (2 * a)
    tb = -2 * c / aux

    ra = jnp.where(ta < tb, ta, tb)
    rb = jnp.where(ta < tb, tb, ta)
    return (ra, rb)


def _cg_steihaug_subproblem(
    cur_val: Union[float, jnp.ndarray],
    g: jnp.ndarray,
    hessp_at_xk,
    *,
    trust_radius: Union[float, jnp.ndarray],
    tr_norm_ord: Union[None, int, float, jnp.ndarray] = None,
    resnorm: Optional[float],
    absdelta: Optional[float] = None,
    norm_ord: Union[None, int, float, jnp.ndarray] = None,
    miniter: Union[None, int] = None,
    maxiter: Union[None, int] = None,
) -> _QuadSubproblemResult:
    from jax.experimental.host_callback import call

    tr_norm_ord = jnp.inf if tr_norm_ord is None else tr_norm_ord  # taken from JAX
    norm_ord = 2 if norm_ord is None else norm_ord  # TODO: change to 1
    maxiter_fallback = 20 * g.size  # taken from SciPy's NewtonCG minimzer
    miniter = jnp.minimum(
        6, maxiter if maxiter is not None else maxiter_fallback
    ) if miniter is None else miniter
    maxiter = jnp.maximum(
        jnp.minimum(200, maxiter_fallback), miniter
    ) if maxiter is None else maxiter

    common_dtp = g.dtype
    eps = 6. * jnp.finfo(common_dtp).eps

    # second-order Taylor series approximation at the current values, gradient,
    # and hessian
    soa = partial(
        second_order_approx, cur_val=cur_val, g=g, hessp_at_xk=hessp_at_xk
    )

    # helpers for internal switches in the main CGSteihaug logic
    def noop(
        param: Tuple[_CGSteihaugState, Union[float, jnp.ndarray]]
    ) -> _CGSteihaugState:
        iterp, z_next = param
        return iterp

    def step1(
        param: Tuple[_CGSteihaugState, Union[float, jnp.ndarray]]
    ) -> _CGSteihaugState:
        iterp, z_next = param
        z, d, nhev = iterp.z, iterp.d, iterp.nhev

        ta, tb = get_boundaries_intersections(z, d, trust_radius)
        pa = z + ta * d
        pb = z + tb * d
        p_boundary = jnp.where(soa(pa) < soa(pb), pa, pb)
        return iterp._replace(
            step=p_boundary, nhev=nhev + 2, hits_boundary=True, done=True
        )

    def step2(
        param: Tuple[_CGSteihaugState, Union[float, jnp.ndarray]]
    ) -> _CGSteihaugState:
        iterp, z_next = param
        z, d = iterp.z, iterp.d

        ta, tb = get_boundaries_intersections(z, d, trust_radius)
        p_boundary = z + tb * d
        return iterp._replace(step=p_boundary, hits_boundary=True, done=True)

    def step3(
        param: Tuple[_CGSteihaugState, Union[float, jnp.ndarray]]
    ) -> _CGSteihaugState:
        iterp, z_next = param
        return iterp._replace(step=z_next, hits_boundary=False, done=True)

    # initialize the step
    p_origin = jnp.zeros_like(g)

    # init the state for the first iteration
    z = p_origin
    r = g
    d = -r
    energy = 0.
    init_param = _CGSteihaugState(
        z=z,
        r=r,
        d=d,
        step=p_origin,
        energy=energy,
        hits_boundary=False,
        done=maxiter == 0,
        nit=0,
        nhev=0
    )
    import jax

    # Search for the min of the approximation of the objective function.
    def body_f(iterp: _CGSteihaugState) -> _CGSteihaugState:
        z, r, d = iterp.z, iterp.r, iterp.d
        energy, nit = iterp.energy, iterp.nit

        nit += 1
        jax.debug.print("in body {nit} \\\\ 1 ::", nit=nit)

        Bd = hessp_at_xk(d)
        dBd = jnp.vdot(d, Bd)

        r_squared = jnp.vdot(r, r)
        alpha = r_squared / dBd
        z_next = z + alpha * d

        r_next = r + alpha * Bd
        r_next_squared = jnp.vdot(r_next, r_next)

        beta_next = r_next_squared / r_squared
        d_next = -r_next + beta_next * d
        jax.debug.print("in body {nit} \\\\ 2 ::", nit=nit)

        accept_z_next = nit >= maxiter
        jax.debug.print(
            "in body {nit} \\\\ 3 :: accept_z_next={accept_z_next}",
            nit=nit,
            accept_z_next=accept_z_next
        )
        if norm_ord == 2:
            r_next_norm = jnp.sqrt(r_next_squared)
        else:
            r_next_norm = jnp.linalg.norm(r_next, ord=norm_ord)
        accept_z_next |= r_next_norm < resnorm
        # Relative to a plain CG, `z_next` is negative
        energy_next = jnp.vdot((r_next + g) / 2, z_next)
        energy_diff = energy - energy_next
        if absdelta is not None:
            neg_energy_eps = -eps * jnp.abs(energy)
            accept_z_next |= (energy_diff >= neg_energy_eps
                             ) & (energy_diff < absdelta) & (nit >= miniter)
        jax.debug.print("in body {nit} \\\\ 4 ::", nit=nit)

        # include a junk switch to catch the case where none should be executed
        z_next_norm = jnp.linalg.norm(z_next, ord=tr_norm_ord)
        jax.debug.print("in body {nit} \\\\ 5 :: pre-index", nit=nit)
        index = jnp.argmax(
            jnp.array(
                [False, dBd <= 0, z_next_norm >= trust_radius, accept_z_next]
            )
        )
        jax.debug.print("in body {nit} \\\\ 6 :: pre-switch {index}", nit=nit, index=index)
        iterp = lax.switch(index, [noop, step1, step2, step3], (iterp, z_next))
        jax.debug.print("in body {nit} \\\\ 7 :: post-switch", nit=nit)

        iterp = iterp._replace(
            z=z_next,
            r=r_next,
            d=d_next,
            energy=energy_next,
            nhev=iterp.nhev + 1,
            nit=nit
        )

        return iterp

    def cond_f(iterp: _CGSteihaugState) -> bool:
        jax.debug.print(
            "cond_f={c} maxiter={maxiter}", c=~iterp.done, maxiter=maxiter
        )
        return jnp.logical_not(iterp.done)

    # perform inner optimization to solve the constrained
    # quadratic subproblem using cg
    jax.debug.print("looped {result.done} {result}", result=init_param)
    result = lax.while_loop(cond_f, body_f, init_param)
    jax.debug.print("looped {result.done} {result}", result=result)

    pred_f = soa(result.step)
    result = _QuadSubproblemResult(
        step=result.step,
        hits_boundary=result.hits_boundary,
        pred_f=pred_f,
        nit=result.nit,
        nfev=0,
        njev=0,
        nhev=result.nhev + 1,
        success=True
    )

    return result


def rosenbrock(np):
    def func(x):
        return jnp.sum(100. * jnp.diff(x)**2 + (1. - x[:-1])**2)

    return func


def himmelblau(np):
    def func(p):
        x, y = p
        return (x**2 + y - 11.)**2 + (x + y**2 - 7.)**2

    return func


def matyas(np):
    def func(p):
        x, y = p
        return 0.26 * (x**2 + y**2) - 0.48 * x * y

    return func


def eggholder(np):
    def func(p):
        x, y = p
        return -(y + 47.) * jnp.sin(
            jnp.sqrt(jnp.abs(x / 2. + y + 47.))
        ) - x * jnp.sin(jnp.sqrt(jnp.abs(x - (y + 47.))))

    return func


def hessp(primals, tangents):
    return jax.jvp(jax.grad(fun), (primals, ), (tangents, ))[1]


fun = eggholder(jnp)
x0 = jnp.ones(2) * 100.
f0, g0 = jax.value_and_grad(fun)(x0)
kwargs = {
    "absdelta": 0.,
    "resnorm": 0.,
    "trust_radius": 1.,
    "norm_ord": 1,
}
_cg_steihaug_subproblem(f0, g0, partial(hessp, x0), **kwargs)
	# %%
	from functools import partial
	from typing import NamedTuple, Optional, Tuple, Union

	import jax
	from jax import lax
	from jax import numpy as jnp

	N_RESET = 20

	class CGResults(NamedTuple):
	x: jnp.ndarray
	nit: Union[int, jnp.ndarray]
	nfev: Union[int, jnp.ndarray] # number of matrix-evaluations
	info: Union[int, jnp.ndarray]
	success: Union[bool, jnp.ndarray]


	# The following is code adapted from Nicholas Mancuso to work with pytrees
	class _QuadSubproblemResult(NamedTuple):
	step: jnp.ndarray
	hits_boundary: Union[bool, jnp.ndarray]
	pred_f: Union[float, jnp.ndarray]
	nit: Union[int, jnp.ndarray]
	nfev: Union[int, jnp.ndarray]
	njev: Union[int, jnp.ndarray]
	nhev: Union[int, jnp.ndarray]
	success: Union[bool, jnp.ndarray]


	class _CGSteihaugState(NamedTuple):
	z: jnp.ndarray
	r: jnp.ndarray
	d: jnp.ndarray
	step: jnp.ndarray
	energy: Union[None, float, jnp.ndarray]
	hits_boundary: Union[bool, jnp.ndarray]
	done: Union[bool, jnp.ndarray]
	nit: Union[int, jnp.ndarray]
	nhev: Union[int, jnp.ndarray]


	def second_order_approx(
	p: jnp.ndarray,
	cur_val: Union[float, jnp.ndarray],
	g: jnp.ndarray,
	hessp_at_xk,
	) -> Union[float, jnp.ndarray]:
	return cur_val + jnp.vdot(g, p) + 0.5 * jnp.vdot(p, hessp_at_xk(p))


	def get_boundaries_intersections(
	z: jnp.ndarray, d: jnp.ndarray, trust_radius: Union[float, jnp.ndarray]
	):
	a = jnp.vdot(d, d)
	b = 2 * jnp.vdot(z, d)
	c = jnp.vdot(z, z) - trust_radius**2
	sqrt_discriminant = jnp.sqrt(b * b - 4 * a * c)

	aux = b + jnp.copysign(sqrt_discriminant, b)
	ta = -aux / (2 * a)
	tb = -2 * c / aux

	ra = jnp.where(ta < tb, ta, tb)
	rb = jnp.where(ta < tb, tb, ta)
	return (ra, rb)


	def _cg_steihaug_subproblem(
	cur_val: Union[float, jnp.ndarray],
	g: jnp.ndarray,
	hessp_at_xk,
	*,
	trust_radius: Union[float, jnp.ndarray],
	tr_norm_ord: Union[None, int, float, jnp.ndarray] = None,
	resnorm: Optional[float],
	absdelta: Optional[float] = None,
	norm_ord: Union[None, int, float, jnp.ndarray] = None,
	miniter: Union[None, int] = None,
	maxiter: Union[None, int] = None,
	) -> _QuadSubproblemResult:
	from jax.experimental.host_callback import call

	tr_norm_ord = jnp.inf if tr_norm_ord is None else tr_norm_ord # taken from JAX
	norm_ord = 2 if norm_ord is None else norm_ord # TODO: change to 1
	maxiter_fallback = 20 * g.size # taken from SciPy's NewtonCG minimzer
	miniter = jnp.minimum(
	6, maxiter if maxiter is not None else maxiter_fallback
	) if miniter is None else miniter
	maxiter = jnp.maximum(
	jnp.minimum(200, maxiter_fallback), miniter
	) if maxiter is None else maxiter

	common_dtp = g.dtype
	eps = 6. * jnp.finfo(common_dtp).eps

	# second-order Taylor series approximation at the current values, gradient,
	# and hessian
	soa = partial(
	second_order_approx, cur_val=cur_val, g=g, hessp_at_xk=hessp_at_xk
	)

	# helpers for internal switches in the main CGSteihaug logic
	def noop(
	param: Tuple[_CGSteihaugState, Union[float, jnp.ndarray]]
	) -> _CGSteihaugState:
	iterp, z_next = param
	return iterp

	def step1(
	param: Tuple[_CGSteihaugState, Union[float, jnp.ndarray]]
	) -> _CGSteihaugState:
	iterp, z_next = param
	z, d, nhev = iterp.z, iterp.d, iterp.nhev

	ta, tb = get_boundaries_intersections(z, d, trust_radius)
	pa = z + ta * d
	pb = z + tb * d
	p_boundary = jnp.where(soa(pa) < soa(pb), pa, pb)
	return iterp._replace(
	step=p_boundary, nhev=nhev + 2, hits_boundary=True, done=True
	)

	def step2(
	param: Tuple[_CGSteihaugState, Union[float, jnp.ndarray]]
	) -> _CGSteihaugState:
	iterp, z_next = param
	z, d = iterp.z, iterp.d

	ta, tb = get_boundaries_intersections(z, d, trust_radius)
	p_boundary = z + tb * d
	return iterp._replace(step=p_boundary, hits_boundary=True, done=True)

	def step3(
	param: Tuple[_CGSteihaugState, Union[float, jnp.ndarray]]
	) -> _CGSteihaugState:
	iterp, z_next = param
	return iterp._replace(step=z_next, hits_boundary=False, done=True)

	# initialize the step
	p_origin = jnp.zeros_like(g)

	# init the state for the first iteration
	z = p_origin
	r = g
	d = -r
	energy = 0.
	init_param = _CGSteihaugState(
	z=z,
	r=r,
	d=d,
	step=p_origin,
	energy=energy,
	hits_boundary=False,
	done=maxiter == 0,
	nit=0,
	nhev=0
	)
	import jax

	# Search for the min of the approximation of the objective function.
	def body_f(iterp: _CGSteihaugState) -> _CGSteihaugState:
	z, r, d = iterp.z, iterp.r, iterp.d
	energy, nit = iterp.energy, iterp.nit

	nit += 1
	jax.debug.print("in body {nit} \\\\ 1 ::", nit=nit)

	Bd = hessp_at_xk(d)
	dBd = jnp.vdot(d, Bd)

	r_squared = jnp.vdot(r, r)
	alpha = r_squared / dBd
	z_next = z + alpha * d

	r_next = r + alpha * Bd
	r_next_squared = jnp.vdot(r_next, r_next)

	beta_next = r_next_squared / r_squared
	d_next = -r_next + beta_next * d
	jax.debug.print("in body {nit} \\\\ 2 ::", nit=nit)

	accept_z_next = nit >= maxiter
	jax.debug.print(
	"in body {nit} \\\\ 3 :: accept_z_next={accept_z_next}",
	nit=nit,
	accept_z_next=accept_z_next
	)
	if norm_ord == 2:
	r_next_norm = jnp.sqrt(r_next_squared)
	else:
	r_next_norm = jnp.linalg.norm(r_next, ord=norm_ord)
	accept_z_next \|= r_next_norm < resnorm
	# Relative to a plain CG, `z_next` is negative
	energy_next = jnp.vdot((r_next + g) / 2, z_next)
	energy_diff = energy - energy_next
	if absdelta is not None:
	neg_energy_eps = -eps * jnp.abs(energy)
	accept_z_next \|= (energy_diff >= neg_energy_eps
	) & (energy_diff < absdelta) & (nit >= miniter)
	jax.debug.print("in body {nit} \\\\ 4 ::", nit=nit)

	# include a junk switch to catch the case where none should be executed
	z_next_norm = jnp.linalg.norm(z_next, ord=tr_norm_ord)
	jax.debug.print("in body {nit} \\\\ 5 :: pre-index", nit=nit)
	index = jnp.argmax(
	jnp.array(
	[False, dBd <= 0, z_next_norm >= trust_radius, accept_z_next]
	)
	)
	jax.debug.print("in body {nit} \\\\ 6 :: pre-switch {index}", nit=nit, index=index)
	iterp = lax.switch(index, [noop, step1, step2, step3], (iterp, z_next))
	jax.debug.print("in body {nit} \\\\ 7 :: post-switch", nit=nit)

	iterp = iterp._replace(
	z=z_next,
	r=r_next,
	d=d_next,
	energy=energy_next,
	nhev=iterp.nhev + 1,
	nit=nit
	)

	return iterp

	def cond_f(iterp: _CGSteihaugState) -> bool:
	jax.debug.print(
	"cond_f={c} maxiter={maxiter}", c=~iterp.done, maxiter=maxiter
	)
	return jnp.logical_not(iterp.done)

	# perform inner optimization to solve the constrained
	# quadratic subproblem using cg
	jax.debug.print("looped {result.done} {result}", result=init_param)
	result = lax.while_loop(cond_f, body_f, init_param)
	jax.debug.print("looped {result.done} {result}", result=result)

	pred_f = soa(result.step)
	result = _QuadSubproblemResult(
	step=result.step,
	hits_boundary=result.hits_boundary,
	pred_f=pred_f,
	nit=result.nit,
	nfev=0,
	njev=0,
	nhev=result.nhev + 1,
	success=True
	)

	return result


	def rosenbrock(np):
	def func(x):
	return jnp.sum(100. * jnp.diff(x)2 + (1. - x[:-1])2)

	return func


	def himmelblau(np):
	def func(p):
	x, y = p
	return (x2 + y - 11.)2 + (x + y2 - 7.)2

	return func


	def matyas(np):
	def func(p):
	x, y = p
	return 0.26 * (x2 + y2) - 0.48 * x * y

	return func


	def eggholder(np):
	def func(p):
	x, y = p
	return -(y + 47.) * jnp.sin(
	jnp.sqrt(jnp.abs(x / 2. + y + 47.))
	) - x * jnp.sin(jnp.sqrt(jnp.abs(x - (y + 47.))))

	return func


	def hessp(primals, tangents):
	return jax.jvp(jax.grad(fun), (primals, ), (tangents, ))[1]


	fun = eggholder(jnp)
	x0 = jnp.ones(2) * 100.
	f0, g0 = jax.value_and_grad(fun)(x0)
	kwargs = {
	"absdelta": 0.,
	"resnorm": 0.,
	"trust_radius": 1.,
	"norm_ord": 1,
	}
	_cg_steihaug_subproblem(f0, g0, partial(hessp, x0), **kwargs)