Skip to content

Instantly share code, notes, and snippets.

@denis-bz
Created November 23, 2018 13:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save denis-bz/307de6cf7a931eb05773f8f740eb8418 to your computer and use it in GitHub Desktop.
Save denis-bz/307de6cf7a931eb05773f8f740eb8418 to your computer and use it in GitHub Desktop.
Nonsmooth optimization: the ABXC test problem

ABXC nonsmooth optimization problems

ABXC is a range of nonsmooth optimization problems from Curtis et al.; see the problem description and links in abxc.py below. They're hard to optimize, very noisy near minima, and some are infeasible. Here's a plot of scipy.optimize.trust-constr creeping up an infeasible slope:

20nov-plot-abxc

Hard limits

If I understand the plots in the paper (correct me), the constraint(s) rho( A + B X C ) <= 1 are not hard, often violated. In fact this is impossible if A is too big -- consider lines y = a + bc x in the plane. I'm more interested in problems with hard limits, so ran with A smaller:

A = np.random.normal( size=(n,n), scale=ascale )  # ascale, standard deviation, ~ 0.2

(In general, there are "interior methods" and "exterior methods": start inside (feasible) and stay inside, or take outside (infeasible) paths that end up feasible. "SQP ... will usually violate nonlinear constraints until convergence, often by large amounts" ? Comments welcome.)

Startpoint 0 ?

Instead of starting ABXC optimizations at x0 = 0, I get better results starting at the
least-squares argmin X: |A + B X C| with X flat. This minimizes the rather different Frobenius norm, but it's fast and easy, takes just a few lines of code.

Of course, with many local minima, any claimed improvement of any method on any problem may be noise.

Comments welcome, more test cases most welcome

cheers
— denis-bz-py at t-online dot de

Last change: 2018-11-23 Nov

#!/usr/bin/env python2
"""ABXC test problem for nonlinear optimization
after Curtis, Mitchell, Overton,
"A BFGS-SQP method for nonsmooth, nonconvex, constrained optimization
and its evaluation using relative minimization profiles"
2016, 34 pages
http://dx.doi.org/10.1080/10556788.2016.1208749
http://coral.ise.lehigh.edu/frankecurtis/files/papers/CurtMitcOver17.pdf
http://www.timmitchell.com/software/PSARNOT -- matlab code and input .mat
The problem:
--------------------------------------------------------------------------------
define f(X) = spectral radius( A + B X C )
with matrices e.g. 6 x 6, 6 x 2, 2 x 3, 3 x 6
Generate sets of random-normal Ai Bi Ci, the Ai with standard deviation `ascale`
Maximize radius( A1 + B1 X C1 ) over all X, e.g. 2 x 3
constraining radius( A0 + B0 X C0 ) <= 1.
--------------------------------------------------------------------------------
Similarly, constrain the radii with several ABC <= 1,
and maximize the max radius with several more.
Radius( A + B X C ) <= 1 may not be feasible with `ascale=1`;
start small, `ascale=0.2` or so.
"""
from __future__ import division
from collections import namedtuple
import numpy as np
from scipy.sparse.linalg import svds # Arpack, fast
__version__ = "2018-11-20 Nov denis-bz-py t-online de"
#...............................................................................
ABCrec = namedtuple( "RandomABC", "A B C n nr nc ascale seed" ) # struct, record
def radius( X, abcrec ):
return maxsing( AplusBXC( X, abcrec ))
def AplusBXC( X, abcrec ):
""" -> A + B.X.C """
A, B, C, n, nr, nc = abcsplit( abcrec )[:6]
X = X.reshape( nr, nc ) # 1d -> 2d
return A + B .dot(X) .dot(C)
def maxsing( A ):
""" max singular value aka spectral radius """
v0 = np.ones( A.shape[1] ) # else random
return svds( A, k=1, tol=1e-6, v0=v0, return_singular_vectors=False )[0]
def radii( X, Alist ):
return np.array([ radius( X, A ) for A in Alist ])
def randomabc( n, nr, nc, ascale=1, seed=123, verbose=1 ):
""" -> an ABCrec, A B C random-normal i.i.d. for A + B X C """
# B, C are fancier in the paper, mttiw
rand = np.random.RandomState( seed=seed ) # don't change any other random
A = rand.normal( size=(n, n), scale=ascale ) # sd 1: A >> BXC ?
B = rand.normal( size=(n, nr) )
C = rand.normal( size=(nc, n) )
abcrec = ABCrec( A, B, C, n, nr, nc, ascale, seed )
if verbose:
print "randomabc:", abcstr( abcrec )
return abcrec
def randomabcs( nrand, n, nr, nc, ascale=1, seed=123, verbose=1 ):
""" -> a list of nrand ABCrecs """
return [randomabc( n, nr, nc, ascale=ascale, seed=seed, verbose=verbose )
for seed in xrange( seed, seed + nrand )]
def abcsplit( abc ):
return abc.A, abc.B, abc.C, abc.n, abc.nr, abc.nc, abc.ascale, abc.seed
def abcstr( abcrec ):
A, B, C, n, nr, nc, ascale, seed = abcsplit( abcrec )
return "n %d nr %d nc %d ascale %.3g seed %d " % (
n, nr, nc, ascale, seed )
#...............................................................................
class OptimABXC( object ):
""" a helper class with func(X) constrf(X) to optimize ABXC problems
opt = OptimABXC( n, nr, nc, ncons, nfun [, ascale, seed, verbose] )
init ncons + nfunc random abcs, A s with standard deviation `ascale`
minimize func(X) = - max radius
s.t. constrf(X) = [1 - radii] >= 0.
"""
def __init__( s, n, nr, nc, ncons=1, nfun=1, ascale=1, seed=123, verbose=1 ):
abcs = randomabcs( ncons + nfun, n, nr, nc, ascale=ascale, seed=seed,
verbose=verbose )
s.consabcs = abcs[:ncons]
s.funcabcs = abcs[ncons:]
s.x0 = np.zeros( nr * nc )
s.verbose = verbose
s._str = abcstr( abcs[0])
s.nfunccalls = s.nconscalls = 0
def func( s, X ):
""" - max radius: minimize this """
s.nfunccalls += 1 # finite-diff fprime calls this often
return - np.amax( s.funcvec( X ))
def funcvec( s, X ):
return radii( X, s.funcabcs ) # to monitor the lot
def constrf( s, X ):
""" [1 - radii ...]: constrain >= 0 """
s.nconscalls += 1
return 1 - radii( X, s.consabcs )
def __str__( s ):
return s._str
#...............................................................................
if __name__ == "__main__": # minimal test
import sys
np.set_printoptions( threshold=10, edgeitems=10, linewidth=140,
formatter = dict( float = lambda x: "%.2g" % x )) # float arrays %.2g
thispy = __file__.split("/")[-1]
print 80 * "="
#...........................................................................
n, nr, nc = 6, 2, 3 # A n x n, X nr x nc
ncons, nfun = 1, 1 # > 1: mttiw
ascale = .2 # sd 1: A >> BXC ?
nrand = 10
tlo, thi = 0, .1 # how flat ?
# to change these params, run this.py a=1 b=None c='expr' ...
# in sh or ipython --
for arg in sys.argv[1:]:
exec( arg )
print "%s n=%d nr=%d nc=%d ncons=%d nfun=%d ascale=%g \n" % (
thispy, n, nr, nc, ncons, nfun, ascale )
print "How do the radii of %d random-normal %d x %d A with sd %g vary ?" % (
nrand, n, n, ascale )
randomA = [abc.A for abc in randomabcs( nrand, n, nr, nc, ascale=ascale, seed=3, verbose=0 )]
print np.sort( map( maxsing, randomA )), "\n"
#...........................................................................
opt = OptimABXC( n, nr, nc, ncons, nfun, ascale=ascale, seed=123, verbose=1 )
print "\nHow flat are constrf and func for X = t * ones(( %d, %d )) ?" % (
nr, nc )
print "(we want to maximize func, with constrf = 1 - radius >= 0)"
for t in np.linspace( tlo, thi, 11 ):
X = t * np.ones(( nr, nc ))
cons = opt.constrf( X )
fvec = opt.funcvec( X )
print "t: %-6.2g constsrf: %s\t func: %.3g = max %s " % (
t, cons, fvec.max(), fvec )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment