| Note'Copyright' | |
| Copyright (C) 2017 Xiao-Yong Jin. All rights reserved. | |
| Permission is hereby granted, free of charge, to any person obtaining a copy | |
| of this software and associated documentation files (the "Software"), to deal | |
| in the Software without restriction, including without limitation the rights | |
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| copies of the Software, and to permit persons to whom the Software is | |
| furnished to do so, subject to the following conditions: | |
| The above copyright notice and this permission notice shall be included in | |
| all copies or substantial portions of the Software. | |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| THE SOFTWARE. | |
| [ MIT license: http://www.opensource.org/licenses/mit-license.php ] | |
| ) | |
| NB.Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method. | |
| NB. Public Interface | |
| NB. ================ | |
| coinsert'pLBFGSp' | |
| cocurrent'pLBFGSp' | |
| lbfgs=:2 :'u lbfgs_pLBFGS_ v' | |
| NB. Examples | |
| NB. ======== | |
| cocurrent'pLBFGSx' | |
| coinsert'pLBFGSp' | |
| sampleRB=:3 :0 | |
| NB.Uncoupled Rosenbrock | |
| n=.-:#y | |
| t1=.1-n{.y | |
| t2=.10*(n}.y)-*:n{.y | |
| (t1+&(+/@:*:)t2);(-+:t1+g2*n{.y),g2=.20*t2 | |
| ) | |
| sample=:3 :'sampleRB lbfgs (0[echo@:,.) (y#_1.2),y#1' | |
| NB. Library Inplementation | |
| NB. ====================== | |
| cocurrent'pLBFGS' | |
| README=:0 :0 | |
| Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method. | |
| Ported from github.com:chokkan/liblbfgs.git | |
| commit 57678b188ae34c2fb2ed36baf54f9a58b4260d1c | |
| of date Jan 25, 2016 | |
| No L1 norm, nor back tracking line search. | |
| Usage: | |
| parameter funcDf lbfgs progress vars | |
| Parameter default to defParam. | |
| ) | |
| NB.Set to error string before throw. | |
| LBFGSERR=:'' | |
| NB.Default parameters, see comments in lbfgs. | |
| defParam=:6 1e_5 0 40 1e_20 1e20 1e_4 0.9 1e_16 | |
| lbfgs=:2 :0 | |
| defParam_pLBFGS_ u lbfgs_pLBFGS_ v y | |
| : | |
| try.x u lbfgsMain_pLBFGS_ v y catcht.'LBFGS error: ',LBFGSERR_pLBFGS_ end. | |
| ) | |
| lbfgsMain=:2 :0 | |
| NB. x parameters | |
| NB. 0{x number of corrections to approx inv hessian | |
| NB. 1{x eps, for convergence test, (+/&.:*:g) < eps*1>.+/&.:*:y | |
| NB. 2{x maximum number of iterations, ignored if 0 | |
| NB. 3{x maximum number of trials for the line search | |
| NB. 4{x minimum step of the line search | |
| NB. 5{x maximum step of the line search | |
| NB. 6{x ftol, accuracy of the line search, should be >&0 *. <&0.5 | |
| NB. 7{x gtol, accuracy of the line search, should be >&ftol *. <&1 | |
| NB. it can be small (0.1), if the evaluation of func&deriv is cheap | |
| NB. 8{x xtol, estimate of the machine precision, as the minimum for | |
| NB. the relative width of the interval of uncertainty | |
| NB. u function on an array variables | |
| NB. u y returns the function value and derivatives | |
| NB. v called with progress information | |
| NB. y array of variables | |
| NB.Returns the final function value and the array of variables | |
| : | |
| LBFGSERR_pLBFGS_=:'' | |
| x=.x,defParam_pLBFGS_}.~#x | |
| lmys=.lma=.0#~0{x | |
| lms=.lmy=.0$~(0{x),#y | |
| 'fy g'=.u y | |
| gn=.+/&.:*: g | |
| if.gn < (1{x) * 1>.+/&.:*: y do.fy;y return.end. | |
| s=.%gn NB.step | |
| d=.-g NB.direction | |
| k=.1 | |
| end=.0 | |
| while.do. | |
| yp=.y | |
| gp=.g | |
| 'y fy g d s'=.x u linesearch_pLBFGS_ y;fy;g;d;s | |
| ynorm=.+/&.:*:y | |
| gnorm=.+/&.:*:g | |
| if.v y;g;fy;ynorm;gnorm;s;k do.fy;y return.end. NB.ABORT with nonzero return from v | |
| if.gnorm<:(1{x)*ynorm=.ynorm>.1 do.fy;y return.end. NB.CONVERGENT | |
| if.(>&0 *. <&(>:k)) 2{x do.LBFGSERR_pLBFGS_=:'maximum iteration'throw.end. | |
| lms=.lms end}~y-yp | |
| lmy=.lmy end}~g-gp | |
| lmys=.lmys end}~ys=.lmy+/@:*&(end&{)lms | |
| yy=.+/@:*:end{lmy | |
| NB.Recursive formula to compute dir=-H.g | |
| bound=.k<.0{x | |
| k=.>:k | |
| end=.(0{x)|>:end | |
| d=.-g | |
| for_j.|.end|.i.bound do. | |
| lma=.lma j}~ (j{lmys) %~ d+/@:*j{lms | |
| d=.d - lma*&(j&{)lmy | |
| end. | |
| d=.d*ys%yy | |
| for_j.end|.i.bound do. | |
| beta=.(j{lmys)%~d+/@:*j{lmy | |
| d=.d + (j{lms) * beta-~j{lma | |
| end. | |
| NB.Now the search direction d is ready. We try step of 1 first. | |
| s=.1 | |
| end. | |
| ) | |
| linesearch=:1 :0 | |
| NB.Line search described by More and Thuente, 1994. | |
| NB. x parameters, the same as in 'lbfgs' | |
| NB. u function on an array variables, the same as in 'lbfgs' | |
| NB. y y;f;g;d;st | |
| NB. y current variables | |
| NB. f current function value | |
| NB. g current gradient | |
| NB. d current direction | |
| NB. st current step | |
| NB.Returns y;f;g;d;st | |
| defParam_pLBFGS_ u linesearch_pLBFGS_ y | |
| : | |
| x=.x,defParam_pLBFGS_}.~#x | |
| NB.echo ,.y | |
| 'y f g d st'=.y | |
| yp=.y | |
| fp=.f | |
| count=.uinfo=.0 | |
| if.0<dginit=.g+/@:*d do.LBFGSERR_pLBFGS_=:'increase gradient'throw.end. | |
| brackt=.0 | |
| stage1=.1 | |
| finit=.f | |
| dgtest=.dginit * 6{x | |
| prevwidth=.+:width=.(5{x) - (4{x) | |
| NB. stx, fx, dgx contain step, function, and derivative at the best step | |
| NB. sty, fy, dgy are at the other endpoint of the interval of uncertainty | |
| NB. st, f, dg are at the current step | |
| stx=.sty=.0 | |
| fx=.fy=.finit | |
| dgx=.dgy=.dginit | |
| while.do. | |
| if.brackt do.'stmin stmax'=./:~stx,sty else.stmax=.st+4*st-stmin=.stx end. | |
| if.st<4{x do.st=.4{x end. if.st>5{x do.st=.5{x end. | |
| if.(brackt *. (((stmin&>: +. stmax&<:) st) +. ((3{x) <: >:count) +. uinfo ~: 0)) +. (brackt *. ((stmax-stmin) <: stmax*8{x))do.st=.stx end. | |
| y=.yp+st*d | |
| 'f g'=.u y | |
| dg=.g+/@:*d | |
| ftest1=.finit+st*dgtest | |
| count=.>:count | |
| NB.echo count,brackt,stmin,stmax,st,uinfo | |
| if.brackt *. (((stmin&>: +. stmax&<:) st) +. uinfo ~: 0)do.LBFGSERR_pLBFGS_=:'rounding error'throw.end. | |
| if.(st=5{x) *. (f<:ftest1) *. dg<:dgtest do.LBFGSERR_pLBFGS_=:'maximum step'throw.end. | |
| if.(st=4{x) *. ((f>ftest1) +. dg>:dgtest)do.LBFGSERR_pLBFGS_=:'minimum step'throw.end. | |
| if.brackt *. (stmax-stmin)<:stmax*8{x do.LBFGSERR_pLBFGS_=:'width too small'throw.end. | |
| if.count>:3{x do.LBFGSERR_pLBFGS_=:'maximum line search'throw.end. | |
| if.(f<:ftest1) *. (|dg)<:-dginit*7{x do.y;f;g;d;st return.end. NB.CONVERGENT | |
| if.stage1 *. (f<:ftest1) *. dg>:dginit*(6{x)<.(7{x)do.stage1=.0 end. | |
| if.stage1 *. (ftest1<f) *. f<:fx do. | |
| NB.Use a modified function | |
| fm=.f-st*dgtest | |
| fxm=.fx-stx*dgtest | |
| fym=.fy-sty*dgtest | |
| dgm=.dg-dgtest | |
| dgxm=.dgx-dgtest | |
| dgym=.dgy-dgtest | |
| 'uinfo stx fxm dgxm sty fym dgym st brackt'=.updateTrialInterval_pLBFGS_ stx,fxm,dgxm,sty,fym,dgym,st,fm,dgm,stmin,stmax,brackt | |
| fx=.fxm+stx*dgtest | |
| fy=.fym+sty*dgtest | |
| dgx=.dgxm+dgtest | |
| dgy=.dgym+dgtest | |
| else. | |
| 'uinfo stx fx dgx sty fy dgy st brackt'=.updateTrialInterval_pLBFGS_ stx,fx,dgx,sty,fy,dgy,st,f,dg,stmin,stmax,brackt | |
| end. | |
| if.brackt do. | |
| if.(0.66*prevwidth)<:|sty-stx do.st=.stx+-:sty-stx end. | |
| prevwidth=.width | |
| width=.|sty-stx | |
| end. | |
| end. | |
| LBFGSERR_pLBFGS_=:'logic error'throw. | |
| ) | |
| updateTrialInterval=:3 :0 | |
| NB.Update a safeguarded trial value and interval for line search. | |
| NB.The parameter x is the step with the least function value, t is the current step. | |
| NB.Assumes the derivative at x is in the direction of the step. | |
| NB.If brackt is true, the minimizer has been bracketed in and interval of uncertainty between x and y. | |
| NB. y x,fx,dx,y,fy,dy,t,ft,dt,tmin,tmax,brackt | |
| NB. f and d denotes function value and its derivative | |
| NB.Returns x,fx,dx,y,fy,dy,t,brackt | |
| 'x fx dx y fy dy t ft dt tmin tmax brackt'=.y | |
| NB.echo 'utiI';brackt,x,fx,dx,y,fy,dy,t,ft,dt | |
| if.brackt do. | |
| if.x(t&<:@<. +. t&>:@>.)y do.LBFGSERR=:'out of interval'throw.end. | |
| if.0<:dx*t-x do.LBFGSERR=:'increase gradient'throw.end. | |
| if.tmax<tmin do.LBFGSERR=:'incorrect tmin tmax'throw.end. | |
| end. | |
| dsign=.dt~:&*dx | |
| if.fx<ft do. | |
| bound=.brackt=.1 | |
| newt=.mc=.cubicMinimizer x,fx,dx,t,ft,dt | |
| mq=.quardMinimizer x,fx,dx,t,ft | |
| if.mc>:&(|@(-&x))mq do.newt=.mc+-:mq-mc end. | |
| NB.echo '#1';mc,mq,newt | |
| elseif.dsign do. | |
| brackt=.1 | |
| bound=.0 | |
| newt=.mc=.cubicMinimizer x,fx,dx,t,ft,dt | |
| mq=.quardMinimizer2 x,dx,t,dt | |
| if.mc<:&(|@(-&t))mq do.newt=.mq end. | |
| NB.echo '#2';mc,mq,newt | |
| elseif.dt<&|dx do. | |
| bound=.1 | |
| newt=.mc=.cubicMinimizer2 x,fx,dx,t,ft,dt,tmin,tmax | |
| mq=.quardMinimizer2 x,dx,t,dt | |
| if.brackt ~: mc<&(|@(-&t))mq do.newt=.mq end. | |
| NB.echo '#3';mc,mq,newt | |
| elseif.do. | |
| bound=.0 | |
| if.brackt do.newt=.cubicMinimizer t,ft,dt,y,fy,dy | |
| elseif.x<t do.newt=.tmax | |
| elseif.do.newt=.tmin | |
| end. | |
| NB.echo '#4';newt | |
| end. | |
| if.fx<ft do. | |
| y=.t | |
| fy=.ft | |
| dy=.dt | |
| else. | |
| if.dsign do. | |
| y=.x | |
| fy=.fx | |
| dy=.dx | |
| end. | |
| x=.t | |
| fx=.ft | |
| dx=.dt | |
| end. | |
| if.tmax<newt do.newt=.tmax end. | |
| if.newt<tmin do.newt=.tmin end. | |
| if.brackt*.bound do. | |
| mq=.x+0.66*y-x | |
| if.x<y do.if.mq<newt do.newt=.mq end. | |
| else.if.newt<mq do.newt=.mq end. | |
| end. | |
| end. | |
| NB.echo 'utiO';brackt,x,fx,dx,y,fy,dy,newt,ft,dt | |
| 0,x,fx,dx,y,fy,dy,newt,brackt | |
| ) | |
| cubicMinimizer=:3 :0 | |
| NB. y u,fu,du,v,fv,dv point u and v and their function value and derivatives | |
| NB.Returns minimizer of an interpolated cubic function | |
| 'u fu du v fv dv'=.y | |
| d=.v-u | |
| p=.|theta=.du+dv+d%~3*fu-fv | |
| q=.|du | |
| r=.|dv | |
| s=.p>.q>.r | |
| gamma=.s * %: (*:theta%s) - (du%s)*(dv%s) | |
| if.v<u do.gamma=.-gamma end. | |
| p=.theta+gamma-du | |
| q=.gamma+dv+gamma-du | |
| r=.p%q | |
| u+r*d | |
| ) | |
| cubicMinimizer2=:3 :0 | |
| NB.Same as 'cubicMinimizer' with bounds on x and a different v and u | |
| NB. y u,fu,du,v,fv,dv,xmin,xmax | |
| NB.Returns minimizer of an interpolated cubic function with bounds | |
| 'u fu du v fv dv xmin xmax'=.y | |
| d=.v-u | |
| p=.|theta=.du+dv+d%~3*fu-fv | |
| q=.|du | |
| r=.|dv | |
| s=.p>.q>.r | |
| gamma=.s * %: 0 >. (*:a=.theta%s) - (du%s)*(dv%s) | |
| if.u<v do.gamma=.-gamma end. | |
| p=.theta+gamma-dv | |
| q=.gamma+du+gamma-dv | |
| r=.p%q | |
| if.(r<0)*.gamma~:0 do.z=.v-r*d | |
| elseif.a<0 do.z=.xmax | |
| elseif.do.z=.xmin | |
| end. | |
| z | |
| ) | |
| quardMinimizer=:3 :0 | |
| NB. y u,fu,du,v,fv point u and v and their function value and one derivative | |
| NB.Returns minimizer of an interpolated quadratic function | |
| 'u fu du v fv'=.y | |
| u + -: a * du % du + (fu-fv) % a=.v-u | |
| ) | |
| quardMinimizer2=:3 :0 | |
| NB.Same as 'quardMinimizer' with only derivatives | |
| NB. y u,du,v,dv point u and v and their function derivatives | |
| NB.Returns minimizer of an interpolated quadratic function | |
| 'u du v dv'=.y | |
| v + dv * (u-v) % dv-du | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment