Skip to content

Instantly share code, notes, and snippets.

@studentbrad
Last active May 20, 2020 01:41
Show Gist options
  • Save studentbrad/a4099d5618f1558e574a45e8e15391b7 to your computer and use it in GitHub Desktop.
Save studentbrad/a4099d5618f1558e574a45e8e15391b7 to your computer and use it in GitHub Desktop.
Various methods for solving unconstrained non-linear optimization problems in Matlab including Newton, TrustRegion and BFGS. An additional test script `experiment.m` is added along with test functions Fletcher, Powell and Rosenbrock.
function [minx, minf, iter, funev] = bfgs(ffunc, gfunc, n, x0, epsfun, epsgrad, epsx, maxiter, maxfun)
%
% *******************
% * MANDATORY INPUT *
% *******************
% ffunc : A string indicating the function evaluation file. For example,
% if the file name is 'fMyFunction.m', then ffunc should be
% 'fMyFunction'.
% gfunc : A string indicating the gradient evaluation file. For example,
% if the file name is 'gadient.m', then gfunc should be
% 'gadient'.
% n : The dimension of the function.
%
% ******************
% * OPTIONAL INPUT *
% ******************
% If user input empty matrix/matrices ([] is an empty matrix) for one or
% more of the following inputs, default values will be taken:
% x0 : The initial point of search. It is a column vector of size n.
% The default value is [1;1;..;1](all the coordinates are 1's).
% epsfun : The function value precision. In the (k+1)th iteration, if
% abs(f_k-f_(k+1))/(1+abs(f_(k+1))) is less than epsfun, which
% means the progress in function value is too small, then the
% program should terminate. f_k and f_(k+1) are the function
% values in the kth and (k+1)th iterations, respectively. The
% default value is 0.0001.
% epsgrad : The gradient precision. If the 2-norm of gradient is less than
% epsgrad, the program should terminate. The default value is
% 0.0001.
% epsx : The argument precision. If the 2-norm of difference between x
% and previous x is less than epsx, which means the change of x
% is too small, then the program should terminate. The default
% value is 0.0001.
% maxiter : Maximum number of iterations. The default value is 100.
% maxfun : Maximum number of function evaluations. A gradient evaluation
% or a Hessian evaluation is also considered as a function
% evaluation. The default value is 100000.
%
% **********
% * OUTPUT *
% minx : The optimal solution of x. It is a column vector of size n.
% minf : The function value of minx.
% iter : The number of iterations.
% funev : The number of function evaluations. A gradient evaluatio is
% considered as a function evaluation.
%
% ***************
% * DESCRIPTION *
% ***************
% This routine uses the BFGS Quasi-Newton method with Wolfe
% line-search rule.
%
% This routine should terminate if any of the following criteria is
% satisfied:
% 1) Maximum number of iterations is exceeded.
% 2) Maximum number of function evaluations is exceeded.
% 3) i. x changes less than epsx; and
% ii. relative change of function value is less than than epsfun; and
% iii. gradient norm is less than epsgrad.
% 4) (Hessian+alpha*I) becomes singular (by checking the determinant).
%
% ************** You can insert code here ******************
%% Check number of inputs %%
if nargin > 9
error('newton:TooManyInputs', ...
'requires at most 7 optional inputs');
elseif nargin < 3
error('newton:TooFewInputs', ...
'requires at least 4 mandatory inputs');
end
%% Defaults %%
if ~exist('x0', 'var') || isempty(x0)
x0 = ones([n, 1]);
end
if ~exist('epsfun', 'var') || isempty(epsfun)
epsfun = 0.0001;
end
if ~exist('epsgrad', 'var') || isempty(epsgrad)
epsgrad = 0.0001;
end
if ~exist('epsx', 'var') || isempty(epsx)
epsx = 0.0001;
end
if ~exist('maxiter', 'var') || isempty(maxiter)
maxiter = 100;
end
if ~exist('maxfun', 'var') || isempty(maxfun)
maxfun = 100000;
end
%% Declarations %%
flag = zeros(1, 6);
iter = 0;
fun = 0;
wolfe_fun = 0;
xk = zeros(n, 1);
xk_p1 = zeros(n, 1);
f_k = 0;
f_k_p1 = 0;
gf_k = zeros(n, 1);
gf_k_p1 = zeros(n, 1);
Bk = zeros(n);
Bk_p1 = eye(n);
sigma_k = zeros(n, 1);
y_k = zeros(n, 1);
delt_Bk = zeros(n);
s_k = zeros(n, 1);
lambda = 0;
fxlambda = 0;
gxlambda = 0;
%% Initial Values %%
xk_p1 = x0;
f_k_p1 = feval(ffunc, xk_p1);
fun = fun + 1;
gf_k_p1 = feval(gfunc, xk_p1);
fun = fun + 1;
Bk_p1 = eye(n);
%% Execution %%
disp('BFGS Method');
disp('-------------------------------------------------------');
disp('1. Initialization');
disp('-------------------------------------------------------');
disp(['x0 (default=[1 ... 1]): ', mat2str(x0', 4)]);
disp(['epsfun (default=0.0001): ', num2str(epsfun)]);
disp(['epsgrad (default=0.0001): ', num2str(epsgrad)]);
disp(['epsx (default=0.0001): ', num2str(epsx)]);
disp(['maxiter (default=100): ', num2str(maxiter)]);
disp(['maxfun (default=100000): ', num2str(maxfun)]);
disp(['Iteration ', num2str(iter), ', f(', mat2str(xk_p1', 4), ') = ', num2str(f_k_p1)]);
disp(' ');
disp('2. Algorithum');
disp('-------------------------------------------------------');
while (flag == zeros(1, 6))
% Increment iter counter
iter = iter + 1;
% x_k -> x_(k + 1)
xk = xk_p1;
% f_k -> f_(k + 1)
f_k = f_k_p1;
% gf_k -> gf_(k + 1)
gf_k = gf_k_p1;
% Bk -> B(k + 1)
Bk = Bk_p1;
% Calculate s_k
s_k = -1 * Bk \ gf_k;
%disp(['s_k = ', mat2str(s_k, 4)]);
% Wolfe Line Search
[lambda, fxlambda, gxlambda, wolfe_fun] = wolfes(ffunc, gfunc, n, xk, s_k);
fun = fun + wolfe_fun;
%disp(['lambda = ', num2str(lambda, 4)]);
%disp(['lambda * s_k = ', mat2str(lambda * s_k, 4)]);
% Update x_(k + 1), f(x_(k + 1)), gf(x_(k + 1))
xk_p1 = xk + lambda * s_k;
f_k_p1 = fxlambda;
gf_k_p1 = gxlambda;
% Find sigma_k
sigma_k = lambda * s_k;
%disp(['sigma_k = ', mat2str(sigma_k, 4)]);
% Find y_k
y_k = gf_k_p1 - gf_k;
%disp(['y_k = ', mat2str(y_k, 4)]);
% Solve for delta_Bk
delt_Bk = (y_k * y_k') / (sigma_k' * y_k) - Bk * (sigma_k * sigma_k') * Bk / (sigma_k' * Bk * sigma_k);
%disp(['delt_Bk = ',mat2str(delt_Bk, 4)]);
% Update B(k + 1)
Bk_p1 = Bk + delt_Bk;
%% Check if conditions are met %%
% Check if maximum number of iterations is exceeded
if (iter > maxiter)
flag(1) = 1;
end
% Check if maximum number of function evaluations is exceeded
if (fun > maxfun)
flag(2) = 1;
end
% Check if function value is sufficient
if (abs(f_k - f_k_p1) / (1 + abs(f_k_p1)) < epsfun) & (xk_p1 ~= xk)
flag(3) = 1;
end
% Check if gf_x is sufficient
if (norm(gf_k) < epsgrad)
flag(4) = 1;
end
% The update of x_k is sufficient
if (abs(xk_p1 - xk) < epsx) & (xk_p1 ~= xk)
flag(5) = 1;
end
% Minimum is found
if (gf_k == zeros(n, 1))
flag(6) = 1;
end
%% Iteration details %%
% Message
disp(['Iteration ', num2str(iter), ', f(', mat2str(xk_p1', 5), ') = ', num2str(f_k_p1), ', Lambda = ', num2str(lambda, 4)]);
end
%% Statistics %%
disp(' ');
disp('3. Statistics');
disp('-------------------------------------------------------');
if (flag(1))
disp('Maximum number of iterations exceeded.');
end
if (flag(2))
disp('Maximum number of function evaluations exceeded.');
end
if (flag(3))
disp('Function value precision reached.');
end
if (flag(4))
disp('Gradient precision reached.');
end
if (flag(5))
disp('Precision of x is reached.');
end
if (flag(6))
disp('The point of minimum was found.');
end
disp(['Total number of iterations: ', num2str(iter)]);
disp(['Total number of function evaluations: ', num2str(fun)]);
disp(['2-norm of gradient: ', num2str(norm(gf_k))]);
disp(['2-norm of x_k - x_(k + 1): ', num2str(norm(xk - xk_p1))]);
disp(['|f_k - f_(k + 1)| / (1 + |f_(k + 1)|): ', num2str(abs(f_k - f_k_p1) / (1 + abs(f_k_p1)))]);
disp(' ');
minx = xk;
minf = f_k;
funev = fun;
return
function [lambda, fxlambda, gxlambda, count] = wolfes(ffunc, gfunc, n, x, s, fx, gx, c1, c2, alpha, beta)
% INPUT: Only the first five parameter is neccesary.
% fx and gx are the function value and gradient at point x. It’s for
% reduction of unnecssary recalculation of these values in applications
% alpha has to be positive and less than 1 if present.
% beta has to be greater than 1 if present.
%
% OUTPUT:
% (1) This line search always stops for differentiable function ffunc. If
% ffunc is continous, lambda is a step length which satisfies Wolfe
% condition; otherwise, this line search may not stop, and even it stops,
% lambda may not satisfy Wolfe condition since it may not exist.
% (2) Whether ffunc is continous or not, when this line search stops,
% fxlambda and gxlambda are always the function value and the gradient of at
% step length lambda, and count is the total number of function evaluations
% including gradient evaluations.
% (3) If s is an ascending direction, lambda returned will be negtive. If s
% is orthogonal to gradient at x, lambda will be 0.
lambda = 0;
count = 0;
if nargin < 11, beta = 1.5; end
if nargin < 10, alpha = 0.5; end
if nargin < 9, c2 = 0.8; end
if nargin < 8, c1 = 0.2; end
if nargin < 7, gx = feval(gfunc, x);
count = count + n;
end
if nargin < 6, fx = feval(ffunc, x);
count = count + 1;
end
fxlambda = fx;
gxlambda = gx;
% Modulate s according to whether s is a descent direction
glambda0 = s' * gx;
ascending = 0;
if glambda0 == 0
return
elseif glambda0 > 0
ascending = 1;
s = -s;
glambda0 = -glambda0;
end
% Determine a step length lambda such that f() goes below line c1
lambda = 1;
fxlambda = feval(ffunc, x+lambda*s);
count = count + 1;
while fxlambda > fx + c1 * lambda * glambda0
lambda = alpha * lambda;
fxlambda = feval(ffunc, x+lambda*s);
count = count + 1;
end
gxlambda = feval(gfunc, x+lambda*s);
count = count + n;
if lambda == 0 || fxlambda == fx, if ascending, lambda = -lambda;
end;
return;
end
glambda = s' * gxlambda;
if glambda >= c2 * glambda0
if ascending, lambda = -lambda; end % Unmodulate s
return
end
% Determine an interval [a,b] where there must be a point satisfies Wolfe condition
if fxlambda >= fx + c2 * lambda * glambda0
a = 0;
b = lambda;
else
a = lambda;
while fxlambda < fx + c2 * lambda * glambda0
lambda = beta * lambda;
fxlambda = feval(ffunc, x+lambda*s);
count = count + 1;
end
if isinf(fxlambda) || isinf(lambda)
gxlambda = feval(gfunc, x+lambda*s);
count = count + n;
if ascending, lambda = -lambda; end % Unmodulate s
return
end
p = a;
q = lambda;
while 1
lambda = (p + q) / 2;
fxlambda = feval(ffunc, x+lambda*s);
count = count + 1;
if lambda == p || lambda == q
% If this happens, function f() must be non-continous and there
% is no point which satisfies Wolfe condition. We choose p as
% lambda and return, which is below line c2.
if lambda == q
lambda = p;
fxlambda = feval(ffunc, x+lambda*s);
count = count + 1;
end
gxlambda = feval(gfunc, x+lambda*s);
count = count + n;
if ascending, lambda = -lambda; end % Unmodulate s
return
end
if fxlambda > fx + c1 * lambda * glambda0
q = lambda;
elseif fxlambda < fx + c2 * lambda * glambda0
p = lambda;
else
break
end
end
b = lambda;
end
% Find a Wolfe point which satisfies Wolfe condition
while 1
lambda = (b + a) / 2;
fxlambda = feval(ffunc, x+lambda*s);
gxlambda = feval(gfunc, x+lambda*s);
count = count + 1 + n;
glambda = s' * gxlambda;
if glambda >= c2 * glambda0
if ascending, lambda = -lambda; end % Unmodulate s
return
end
if lambda == a || lambda == b
% If this happens, function f() must be non-continous and there is
% no point which satisfies Wolfe condition. We choose a as lambda
% and return, which is below line c2.
if lambda == b
lambda = a;
fxlambda = feval(ffunc, x+lambda*s);
gxlambda = feval(gfunc, x+lambda*s);
count = count + 1 + n;
end
if ascending, lambda = -lambda; end % Unmodulate s
return
end
if fxlambda >= fx + c2 * lambda * glambda0
b = lambda;
else
a = lambda;
end
end
return
%% Test #1 %%
% Test Rosenbrock function with epsx of 0.00001
[minx_n, f_n, iter_n, funev_n] = newton('fRosenbrock', 'gRosenbrock', 'hRosenbrock', 2, [2, 5]', [], [], [], 0.00001);
[minx_t, f_t, iter_t, funev_t] = trustregion('fRosenbrock', 'gRosenbrock', 'hRosenbrock', 2, [2, 5]', [], [], [], 0.00001);
[minx_b, f_b, iter_b, funev_b] = bfgs('fRosenbrock', 'gRosenbrock', 2, [2, 5]', [], [], 0.00001);
[minx_f, f_f] = fminsearch('fRosenbrock', [2, 5]');
[minx_u, f_u] = fminunc('fRosenbrock', [2, 5]');
disp('4. Results');
disp('-------------------------------------------------------');
disp(['Newton result: xmin = ', mat2str(minx_n, 4), ' , fmin = ', num2str(f_n, 4), ' , iter = ', num2str(iter_n, 4), ' , funev = ', num2str(funev_n, 4)]);
disp(['TrustRegion result: xmin = ', mat2str(minx_t', 4), ' , fmin = ', num2str(f_t, 4), ' , iter = ', num2str(iter_t, 4), ' , funev = ', num2str(funev_t, 4)]);
disp(['BGFS result: xmin = ', mat2str(minx_b', 4), ' , fmin = ', num2str(f_b, 4), ' , iter = ', num2str(iter_b, 4), ' , funev = ', num2str(funev_b, 4)]);
disp(['fminsearch result: xmin = ', mat2str(minx_f', 4), ' , fmin = ', num2str(f_f, 4)]);
disp(['fminunc result: xmin = ', mat2str(minx_u', 4), ' , fmin = ', num2str(f_u, 4)]);
disp('Actual: xmin = [1 1] , fmin = 0');
%% Test #2 %%
% Test Powell function with epsx of 0.00001
[minx_n, f_n, iter_n, funev_n] = newton('fPowell', 'gPowell', 'hPowell', 4, [2, 5, -4, 3]', [], [], [], 0.00001);
[minx_t, f_t, iter_t, funev_t] = trustregion('fPowell', 'gPowell', 'hPowell', 4, [2, 5, -4, 3]', [], [], [], 0.00001);
[minx_b, f_b, iter_b, funev_b] = bfgs('fPowell', 'gPowell', 4, [2, 5, -4, 3]', [], [], 0.00001);
[minx_f, f_f] = fminsearch('fPowell', [2, 5, -4, 3]');
[minx_u, f_u] = fminunc('fPowell', [2, 5, -4, 3]');
disp('4. Results');
disp('-------------------------------------------------------');
disp(['Newton result: xmin = ', mat2str(minx_n, 4), ' , fmin = ', num2str(f_n, 4), ' , iter = ', num2str(iter_n, 4), ' , funev = ', num2str(funev_n, 4)]);
disp(['TrustRegion result: xmin = ', mat2str(minx_t', 4), ' , fmin = ', num2str(f_t, 4), ' , iter = ', num2str(iter_t, 4), ' , funev = ', num2str(funev_t, 4)]);
disp(['BGFS result: xmin = ', mat2str(minx_b', 4), ' , fmin = ', num2str(f_b, 4), ' , iter = ', num2str(iter_b, 4), ' , funev = ', num2str(funev_b, 4)]);
disp(['fminsearch result: xmin = ', mat2str(minx_f', 4), ' , fmin = ', num2str(f_f, 4)]);
disp(['fminunc result: xmin = ', mat2str(minx_u', 4), ' , fmin = ', num2str(f_u, 4)]);
disp('Actual: xmin = [0 0 0 0] , fmin = 0');
%% Test #3 %%
% Test Fletcher function with epsx of 0.00001
[minx_n, f_n, iter_n, funev_n] = newton('fFletcher', 'gFletcher', 'hFletcher', 3, [-10, 4, 2]', [], [], [], 0.00001);
[minx_t, f_t, iter_t, funev_t] = trustregion('fFletcher', 'gFletcher', 'hFletcher', 3, [-10, 4, 2]', [], [], [], 0.00001);
[minx_b, f_b, iter_b, funev_b] = bfgs('fFletcher', 'gFletcher', 3, [-10, 4, 2]', [], [], 0.00001);
[minx_f, f_f] = fminsearch('fFletcher', [-10, 4, 2]');
[minx_u, f_u] = fminunc('fFletcher', [-10, 4, 2]');
disp('4. Results');
disp('-------------------------------------------------------');
disp(['Newton result: xmin = ', mat2str(minx_n, 4), ' , fmin = ', num2str(f_n, 4), ' , iter = ', num2str(iter_n, 4), ' , funev = ', num2str(funev_n, 4)]);
disp(['TrustRegion result: xmin = ', mat2str(minx_t', 4), ' , fmin = ', num2str(f_t, 4), ' , iter = ', num2str(iter_t, 4), ' , funev = ', num2str(funev_t, 4)]);
disp(['BGFS result: xmin = ', mat2str(minx_b', 4), ' , fmin = ', num2str(f_b, 4), ' , iter = ', num2str(iter_b, 4), ' , funev = ', num2str(funev_b, 4)]);
disp(['fminsearch result: xmin = ', mat2str(minx_f', 4), ' , fmin = ', num2str(f_f, 4)]);
disp(['fminunc result: xmin = ', mat2str(minx_u', 4), ' , fmin = ', num2str(f_u, 4)]);
disp('Actual: xmin = [1 0 0] or [-1 0 0] , fmin = 0');
function f = fFletcher(x)
if (length (x) ~= 3)
error('Error: function expects a three dimensional input\n');
end
f = 100 * (x(3) - 5 / pi * atan(x(2) / x(1)))^2 + (sqrt((x(1))^2 + (x(2))^2) - 1)^2 + (x(3))^2;
return
end
function f = fPowell(x)
if (length (x) ~= 4)
error('Error: function expects a four dimensional input\n');
end
f = (x(1) + 10 * x(2))^2 + 5 * (x(3) - x(4))^2 + (x(2) - 2 * x(3))^4 + 10 * (x(1) - x(4))^4;
return
end
function f = fRosenbrock(x)
if (length (x) ~= 2)
error('Error: function expects a two dimensional input\n');
end
f = 100 * (x(2) - (x(1))^2)^2 + (1 - x(1))^2;
return
end
function g = gFletcher(i)
if (length (i) ~= 3)
error('Error: function expects a three dimensional input\n');
end
syms x y z;
f = 100 * (z - 5 / pi * atan(y / x))^2 + (sqrt((x)^2 + (y)^2) - 1)^2 + (z)^2;
grad = gradient(f, [x, y, z]);
x = i(1);
y = i(2);
z = i(3);
g = double(subs(grad));
return
end
function g = gPowell(i)
if (length (i) ~= 4)
error('Error: function expects a four dimensional input\n');
end
syms x y z w;
f = (x + 10 * y)^2 + 5 * (z - w)^2 + (y - 2 * z)^4 + 10 * (x - w)^4;
grad = gradient(f, [x, y, z, w]);
x = i(1);
y = i(2);
z = i(3);
w = i(4);
g = double(subs(grad));
return
end
function g = gRosenbrock(i)
if (length (i) ~= 2)
error('Error: function expects a two dimensional input\n');
end
syms x y;
f = 100 * (y - (x)^2)^2 + (1 - x)^2;
grad = gradient(f, [x, y]);
x = i(1);
y = i(2);
g = double(subs(grad));
return
end
function h = hFletcher(i)
if (length (i) ~= 3)
error('Error: function expects a three dimensional input\n');
end
syms x y z;
f = 100 * (z - 5 / pi * atan(y / x))^2 + (sqrt((x)^2 + (y)^2) - 1)^2 + (z)^2;
hess = hessian(f, [x, y, z]);
x = i(1);
y = i(2);
z = i(3);
h = double(subs(hess));
return
end
function h = hPowell(i)
if (length (i) ~= 4)
error('Error: function expects a four dimensional input\n');
end
syms x y z w;
f = (x + 10 * y)^2 + 5 * (z - w)^2 + (y - 2 * z)^4 + 10 * (x - w)^4;
hess = hessian(f, [x, y, z, w]);
x = i(1);
y = i(2);
z = i(3);
w = i(4);
h = double(subs(hess));
return
end
function h = hRosenbrock(i)
if (length (i) ~= 2)
error('Error: function expects a two dimensional input\n');
end
syms x y;
f = 100 * (y - (x)^2)^2 + (1 - x)^2;
hess = hessian(f, [x, y]);
x = i(1);
y = i(2);
h = double(subs(hess));
return
end
function [minx, minf, iter, funev] = newton(ffunc, gfunc, hfunc, n, x0, epsfun, epsgrad, epshess, epsx, maxiter, maxfun)
%
% *******************
% * MANDATORY INPUT *
% *******************
% These inputs must be provided by user when using the routine:
% ffunc : A string indicating the function evaluation file. For example,
% if the file name is 'fMyFunction.m', then ffunc should be
% 'fMyFunction'.
% gfunc : A string indicating the gradient evaluation file. For example,
% if the file name is 'gadient.m', then gfunc should be
% 'gadient'.
% hfunc : A string indicating the Hessian evaluation file. For example,
% if the file name is 'hessian.m', then hfunc should be
% 'hessian'.
% n : The dimension of the function.
%
% ******************
% * OPTIONAL INPUT *
% ******************
% If user input empty matrix/matrices ([] is an empty matrix) for one or
% more of the following inputs, default values will be taken:
% x0 : The initial point of search. It is a column vector of size n.
% The default value is [1;1;..;1](all the coordinates are 1's).
% epsfun : The function value precision. In the (k+1)th iteration, if
% abs(f_k-f_(k+1))/(1+abs(f_(k+1))) is less than epsfun, which
% means the progress in function value is too small, then the
% program should terminate. f_k and f_(k+1) are the function
% values in the kth and (k+1)th iterations, respectively. The
% default value is 0.0001.
% epsgrad : The gradient precision. If the 2-norm of gradient is less than
% epsgrad, the program should terminate. The default value is
% 0.0001.
% epshess : The hessian precision. If the magnitude of determinant for
% Hessian is less than epshess, the program should terminate.
% The default value of epshess is 0.0001.
% epsx : The argument precision. If the 2-norm of difference between x
% and previous x is less than epsx, which means the change of x
% is too small, then the program should terminate. The default
% value is 0.0001.
% maxiter : Maximum number of iterations. The default value is 100.
% maxfun : Maximum number of function evaluations. A gradient evaluation
% or a Hessian evaluation is also considered as a function
% evaluation. The default value is 100000.
%
% **********
% * OUTPUT *
% **********
% minx : The optimal solution of x. It is a column vector of size n.
% minf : The function value of minx.
% iter : The number of iterations.
% funev : The number of function evaluations. A gradient evaluation or a
% Hessian evaluation is also considered as a function evaluation.
%
% ***************
% * DESCRIPTION *
% ***************
% This routine should terminate if any of the following criteria is
% satisfied:
% 1) Maximum number of iterations is exceeded.
% 2) Maximum number of function evaluations is exceeded.
% 3) i. x changes less than epsx; and
% ii. relative change of function value is less than than epsfun; and
% iii. gradient norm is less than epsgrad.
% 4) Hessian becomes singular (by checking the determinant).
%
% ***************** You can insert code here ******************
%% Check number of inputs %%
if nargin > 11
error('newton:TooManyInputs', ...
'requires at most 7 optional inputs');
elseif nargin < 4
error('newton:TooFewInputs', ...
'requires at least 4 mandatory inputs');
end
%% Defaults %%
if ~exist('x0', 'var') || isempty(x0)
x0 = ones([n, 1]);
end
if ~exist('epsfun', 'var') || isempty(epsfun)
epsfun = 0.0001;
end
if ~exist('epsgrad', 'var') || isempty(epsgrad)
epsgrad = 0.0001;
end
if ~exist('epshess', 'var') || isempty(epshess)
epshess = 0.0001;
end
if ~exist('epsx', 'var') || isempty(epsx)
epsx = 0.0001;
end
if ~exist('maxiter', 'var') || isempty(maxiter)
maxiter = 100;
end
if ~exist('maxfun', 'var') || isempty(maxfun)
maxfun = 100000;
end
%% Declarations %%
flag = zeros(1, 8);
iter = 0;
fun = 0;
xk = zeros(n, 1);
xk_p1 = x0;
f_k = 0;
f_k_p1 = 0;
gf_k = zeros(n, 1);
hf_k = zeros(n);
%% Execution %%
disp('Newton Method');
disp('-------------------------------------------------------');
disp('1. Initialization');
disp('-------------------------------------------------------');
disp(['x0 (default=[1 ... 1]): ', mat2str(x0', 4)]);
disp(['epsfun (default=0.0001): ', num2str(epsfun)]);
disp(['epsgrad (default=0.0001): ', num2str(epsgrad)]);
disp(['epshess (default=0.0001): ', num2str(epshess)]);
disp(['epsx (default=0.0001): ', num2str(epsx)]);
disp(['maxiter (default=100): ', num2str(maxiter)]);
disp(['maxfun (default=100000): ', num2str(maxfun)]);
f_k_p1 = feval(ffunc, xk_p1);
fun = fun + 1;
disp(['Iteration ', num2str(iter), ', f(', mat2str(xk_p1', 4), ') = ', num2str(f_k_p1)]);
disp(' ');
disp('2. Algorithum');
disp('-------------------------------------------------------');
while (flag == zeros(1, 8))
% Increment iter counter
iter = iter + 1;
% x_k -> x_(k + 1)
xk = xk_p1;
% f_k -> f_(k + 1)
f_k = f_k_p1;
%% Function evals (3 total) %%
%1
gf_k = feval(gfunc, xk);
fun = fun + 1;
%2
hf_k = feval(hfunc, xk);
fun = fun + 1;
% Calculate new x_(k + 1)
xk_p1 = xk - hf_k \ gf_k;
%3
f_k_p1 = feval(ffunc, xk_p1);
fun = fun + 1;
%% Check if conditions are met %%
% Check if maximum number of iterations is exceeded
if (iter > maxiter)
flag(1) = 1;
end
% Check if maximum number of function evaluations is exceeded
if (fun > maxfun)
flag(2) = 1;
end
% Check if function value is sufficient
if (abs(f_k - f_k_p1) / (1 + abs(f_k_p1)) < epsfun) & (xk_p1 ~= xk)
flag(3) = 1;
end
% Check if gf_x is sufficient
if (norm(gf_k) < epsgrad)
flag(4) = 1;
end
%Check if hf_x is sufficient
if (abs(det(hf_k)) < epshess)
flag(5) = 1;
end
% Hessian becomes singular (For Newton only).
if (det(hf_k) == 0)
flag(6) = 1;
end
% The update of x_k is sufficient
if (abs(xk_p1 - xk) < epsx) & (xk_p1 ~= xk)
flag(7) = 1;
end
% Minimum is found
[~, p] = chol(hf_k);
if ((gf_k == zeros(n, 1)) & (p >= 0))
flag(8) = 1;
end
%% Iteration details %%
% Message
disp(['Iteration ', num2str(iter), ', f(', mat2str(xk_p1', 4), ') = ', num2str(f_k_p1)]);
end
%% Statistics %%
disp(' ');
disp('3. Statistics');
disp('-------------------------------------------------------');
if (flag(1))
disp('Maximum number of iterations exceeded.');
end
if (flag(2))
disp('Maximum number of function evaluations exceeded.');
end
if (flag(3))
disp('Function value precision reached.');
end
if (flag(4))
disp('Gradient precision reached.');
end
if (flag(5))
disp('Hessian precision reached.');
end
if (flag(6))
disp('Hessian is sigular.');
end
if (flag(7))
disp('Precision of x is reached.');
end
if (flag(8))
disp('The point of minimum was found.');
end
disp(['Total number of iterations: ', num2str(iter)]);
disp(['Total number of function evaluations: ', num2str(fun)]);
disp(['2-norm of gradient: ', num2str(norm(gf_k))]);
disp(['2-norm of x_k - x_(k + 1): ', num2str(norm(xk - xk_p1))]);
disp(['|f_k - f_(k + 1)| / (1 + |f_(k + 1)|): ', num2str(abs(f_k - f_k_p1) / (1 + abs(f_k_p1)))]);
disp(' ');
minx = xk;
minf = f_k;
funev = fun;
return
function [minx, minf, iter, funev] = trustregion(ffunc, gfunc, hfunc, n, x0, epsfun, epsgrad, epshess, epsx, maxiter, maxfun, gamma1, gamma2, mu, eta, alpha)
%
% *******************
% * MANDATORY INPUT *
% *******************
% These inputs must be provided by user when using the routine:
% ffunc : A string indicating the function evaluation file. For example,
% if the file name is 'fMyFunction.m', then ffunc should be
% 'fMyFunction'.
% gfunc : A string indicating the gradient evaluation file. For example,
% if the file name is 'gadient.m', then gfunc should be
% 'gadient'.
% hfunc : A string indicating the Hessian evaluation file. For example,
% if the file name is 'hessian.m', then hfunc should be
% 'hessian'.
% n : The dimension of the function.
%
% ******************
% * OPTIONAL INPUT *
% ******************
% If user input empty matrix/matrices ([] is an empty matrix) for one or
% more of the following inputs, default values will be taken:
% x0 : The initial point of search. It is a column vector of size n.
% The default value is [1;1;..;1](all the coordinates are 1's).
% epsfun : The function value precision. In the (k+1)th iteration, if
% abs(f_k-f_(k+1))/(1+abs(f_(k+1))) is less than epsfun, which
% means the progress in function value is too small, then the
% program should terminate. f_k and f_(k+1) are the function
% values in the kth and (k+1)th iterations, respectively. The
% default value is 0.0001.
% epsgrad : The gradient precision. If the 2-norm of gradient is less than
% epsgrad, the program should terminate. The default value is
% 0.0001.
% epshess : The hessian precision. If the magnitude of determinant for
% Hessian is less than epshess, the program should terminate.
% The default value of epshess is 0.0001.
% epsx : The argument precision. If the 2-norm of difference between x
% and previous x is less than epsx, which means the change of x
% is too small, then the program should terminate. The default
% value is 0.0001.
% maxiter : Maximum number of iterations. The default value is 100.
% maxfun : Maximum number of function evaluations. A gradient evaluation
% or a Hessian evaluation is also considered as a function
% evaluation. The default value is 100000.
% gamma1 : A parameter to scale alpha down. The default value is 0.5.
% gamma2 : A parameter to scale alpha up. The default value is 2.
% mu : The threshold to scale alpha up. If
%
% Actual function value decrease
% --------------------------------- < mu
% predicted function value decrease
%
% then alpha is scaled up. The default value of mu is 0.25.
% eta : The threshold to scale alpha down. If
%
% Actual function value decrease
% --------------------------------- > eta
% predicted function value decrease
%
% then alpha is scaled down. The default value of mu is 0.75.
% alpha : A parameter to obtain update direction. The default value is
% 1.
%
% **********
% * OUTPUT *
% **********
% minx : The optimal solution of x. It is a column vector of size n.
% minf : The function value of minx.
% iter : The number of iterations.
% funev : The number of function evaluations. A gradient evaluation or a
% Hessian evaluation is also considered as a function evaluation.
%
% ***************
% * DESCRIPTION *
% ***************
% This routine uses the Trust Region method.
% This routine should terminate if any of the following criteria is
% satisfied:
% 1) Maximum number of iterations is exceeded.
% 2) Maximum number of function evaluations is exceeded.
% 3) i. x changes less than epsx; and
% ii. relative change of function value is less than than epsfun; and
% iii. gradient norm is less than epsgrad.
% 4) (Hessian+alpha*I) becomes singular (by checking the determinant).
%
% ************** You can insert code here ******************
%% Check number of inputs %%
if nargin > 16
error('newton:TooManyInputs', ...
'requires at most 12 optional inputs');
elseif nargin < 4
error('newton:TooFewInputs', ...
'requires at least 4 mandatory inputs');
end
%% Defaults %%
if ~exist('x0', 'var') || isempty(x0)
x0 = ones([n, 1]);
end
if ~exist('epsfun', 'var') || isempty(epsfun)
epsfun = 0.0001;
end
if ~exist('epsgrad', 'var') || isempty(epsgrad)
epsgrad = 0.0001;
end
if ~exist('epshess', 'var') || isempty(epshess)
epshess = 0.0001;
end
if ~exist('epsx', 'var') || isempty(epsx)
epsx = 0.0001;
end
if ~exist('maxiter', 'var') || isempty(maxiter)
maxiter = 100;
end
if ~exist('maxfun', 'var') || isempty(maxfun)
maxfun = 100000;
end
if ~exist('gamma1', 'var') || isempty(gamma1)
gamma1 = 0.5;
end
if ~exist('gamma2', 'var') || isempty(gamma2)
gamma2 = 2;
end
if ~exist('mu', 'var') || isempty(mu)
mu = 0.25;
end
if ~exist('eta', 'var') || isempty(eta)
eta = 0.75;
end
if ~exist('alpha', 'var') || isempty(alpha)
alpha = 1;
end
%% Declarations %%
flag = zeros(1, 8); % Idicators for termination cause
iter = 0; % Iteration number
fun = 0; % Number of function evals
xk = 0; % x_k
xk_p1 = x0; % x_(k + 1)
f_k = 0; % f(x_k)
f_k_p1 = 0; % f(x_(k + 1))
gf_k = zeros(1, n); % gradient(f(x_k))
hf_k = zeros(n); % hessian(f(x_k))
s_k = 0; % s_k
f_xkpsk = 0; % f(x_k + s_k)
q_xkpsk = 0; % q(x_k + s_k)
p_k = 0; % p_k
%% Execution %%
disp('Trust Region Method');
disp('-------------------------------------------------------');
disp('1. Initialization');
disp('-------------------------------------------------------');
disp(['x0 (default=[1 ... 1]): ', mat2str(x0', 4)]);
disp(['epsfun (default=0.0001): ', num2str(epsfun)]);
disp(['epsgrad (default=0.0001): ', num2str(epsgrad)]);
disp(['epshess (default=0.0001): ', num2str(epshess)]);
disp(['epsx (default=0.0001): ', num2str(epsx)]);
disp(['maxiter (default=100): ', num2str(maxiter)]);
disp(['maxfun (default=100000): ', num2str(maxfun)]);
disp(['gamma1 (default=0.5): ', num2str(gamma1)]);
disp(['gamma2 (default=2): ', num2str(gamma2)]);
disp(['mu (default=0.25): ', num2str(mu)]);
disp(['eta (default=0.75): ', num2str(eta)]);
disp(['alpha (default=1): ', num2str(alpha)]);
f_k_p1 = feval(ffunc, xk_p1);
fun = fun + 1;
disp(['Iteration ', num2str(iter), ', f(', mat2str(xk_p1', 5), ') = ', num2str(f_k_p1), ', alpha = ', num2str(alpha, 6)]);
disp(' ');
disp('2. Algorithum');
disp('-------------------------------------------------------');
while (flag == zeros(1, 8))
% Increment iter counter
iter = iter + 1;
% x_k -> x_(k+1)
xk = xk_p1;
% f_k -> f_(k+1)
f_k = f_k_p1;
%% Function evals (4 total) %%
%1
gf_k = feval(gfunc, xk);
fun = fun + 1;
%2
hf_k = feval(hfunc, xk);
fun = fun + 1;
% Calculate s_k
s_k = -inv(hf_k+alpha*eye(n)) * gf_k;
% Calculate f_(x_k + s_k(alpha))
%3
f_xkpsk = feval(ffunc, xk+s_k);
fun = fun + 1;
% Calculate q_(x_k + s_k(alpha))
q_xkpsk = f_k + gf_k' * s_k + 0.5 * s_k' * hf_k * s_k;
% Calculate p_k
p_k = (f_k - f_xkpsk) / (f_k - q_xkpsk);
% Calculate new value for x_(k+1), alpha
if p_k < mu
xk_p1 = xk;
alpha = gamma2 * alpha;
end
if (p_k >= mu) && (p_k <= eta)
xk_p1 = xk + s_k;
end
if p_k > eta
xk_p1 = xk + s_k;
alpha = gamma1 * alpha;
end
%4
f_k_p1 = feval(ffunc, xk_p1);
%% Check if conditions are met %%
% Check if maximum number of iterations is exceeded
if (iter > maxiter)
flag(1) = 1;
end
% Check if maximum number of function evaluations is exceeded
if (fun > maxfun)
flag(2) = 1;
end
% Check if function value is sufficient
if (abs(f_k - f_k_p1) / (1 + abs(f_k_p1)) < epsfun) & (xk_p1 ~= xk)
flag(3) = 1;
end
% Check if gf_x is sufficient
if (norm(gf_k) < epsgrad)
flag(4) = 1;
end
% Check if hf_x is sufficient
if (abs(det(hf_k)) < epshess)
flag(5) = 1;
end
% Hessian becomes singular (For Newton only)
if (det(hf_k) == 0)
flag(6) = 1;
end
% The update of x_k is sufficient
if (norm(xk_p1 - xk) < epsx) & (xk_p1 ~= xk)
flag(7) = 1;
end
% Minimum is found
[~, p] = chol(hf_k);
if ((gf_k == zeros(n, 1)) & (p >= 0))
flag(8) = 1;
end
%% Iteration details %%
% Message
disp(['Iteration ', num2str(iter), ', f(', mat2str(xk_p1', 5), ') = ', num2str(f_k_p1), ', alpha = ', num2str(alpha, 6)]);
end
%% Statistics %%
disp(' ');
disp('3. Statistics');
disp('-------------------------------------------------------');
if (flag(1))
disp('Maximum number of iterations exceeded.');
end
if (flag(2))
disp('Maximum number of function evaluations exceeded.');
end
if (flag(3))
disp('Function value precision reached.');
end
if (flag(4))
disp('Gradient precision reached.');
end
if (flag(5))
disp('Hessian precision reached.');
end
if (flag(6))
disp('Hessian is sigular.');
end
if (flag(7))
disp('Precision of x is reached.');
end
if (flag(8))
disp('The point of minimum was found.');
end
disp(['Total number of iterations: ', num2str(iter)]);
disp(['Total number of function evaluations: ', num2str(fun)]);
disp(['2-norm of gradient: ', num2str(norm(gf_k))]);
disp(['2-norm of x_k - x_(k + 1): ', num2str(norm(xk - xk_p1))]);
disp(['|f_k - f_(k + 1)| / (1 + |f_(k + 1)|): ', num2str(abs(f_k - f_k_p1) / (1 + abs(f_k_p1)))]);
disp(' ');
minx = xk;
minf = f_k;
funev = fun;
return
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment