xgao32/hw5_newtoncg.m

## hw5_newtoncg.m
% hw5_newtoncg.m
%
% Math 571 HW 5 Q1.
%
% Script to compute and plot objective function value, norm of gradient of f(x), number of function and gradient evaluations, and runtime of using Newton CG to minimize
% f(x) = e^T* (e + A_1*(D_1*x + d_1).^2) + A_2*(D_2*x + d_2).^2).^1/2)
% CG finds descent direction p_i that are conjugate to each other when
% iterating x_k+1 = x_k + alpha*p_k


load 'MinSurfProb.mat'
tic;

x_0 = xlr;  % starting point
%x_k = x_0;

toliter = 10; % max number of iterations
tolfun = 1e-10; % tolerance for ending steepest descent when ||f(x_k+1) - f(x_k)|| < tolfun
tolopt = 1e-10; % tolerance for ending steepest descent when || df(x_k) || < tolopt
alpha = 1; % initial step size

% store result of intermediate iterations
% ||f(x_curent) - f(x_previous)||,
% objective function value,
% norm of gradient of f(x),
% number of function, gradient, CG evaluations
% step size
res = zeros(toliter,7);

% store intermediate points
xpoints = zeros(toliter,size(xlr,1));

% store CG directions
cgp = zeros(toliter, size(xlr,1));

iter = 1;
feval_iter = 1; % count f(x) evaluation
dfeval_iter = 1; % count df(x) evaluation
cg_iter = 0; % count CG iterations

n = size(A1,1); % 16384
f = @(x) dot(ones(n,1),((ones(n,1) + A1*(((D1*x)+d1).^2) + A2*(((D2*x)+d2).^2)).^0.5)); % function
df = @(x) ((D1'*diag((D1*x)+d1))*A1' + (D2'*diag((D2*x)+d2))*A2')*(1./((ones(n,1) + A1*(((D1*x)+d1).^2) + A2*(((D2*x)+d2).^2)).^0.5)); % gradient of function

%[fval,dfval] = funwithgrad(x_0); %too slow

fval = f(x_0); % evaluate f(x_current) and df(x_current)
dfval = df(x_0);
fval_prev = fval;

% update result
res(1,1) = norm(fval - fval_prev);
res(1,2) = fval;
res(1,3) = norm(dfval);
res(1,4) = feval_iter;
res(1,5) = dfeval_iter;
res(1,7) = alpha;
xpoints(1,:) = x_0;

iter = iter + 1; % 1st iteration occured

while iter < toliter

    [pk,dfeval_iter_cg,cg_iter_true] = newtoncg_direction(x_0,dfval,f,df);
    cg_iter = cg_iter + cg_iter_true;

    [alpha,feval_iter_LS] = linesearch(x_0,fval,dfval,1,pk,f); % line search, initial step size = 1, p from CG

    x_k = x_0 + alpha*(pk); % x_k = x_0 + alpha*p

    fval = f(x_k);% update f(x_current), df(x_current)
    dfval = df(x_k);

    feval_iter = feval_iter + feval_iter_LS + 1; % update number of f(x) evaluation
    dfeval_iter = dfeval_iter + dfeval_iter_cg + 1;

    res(iter,1) = norm(fval - fval_prev); % store || f(x_current) - f(x_previous) ||
    res(iter,2) = fval; % f(x_current)
    res(iter,3) = norm(dfval); % df(x_current)
    res(iter,4) = feval_iter;
    res(iter,5) = dfeval_iter;
    res(iter,6) = cg_iter;
    res(iter,7) = alpha;
    xpoints(iter,:) = x_k;
    cgp(iter,:) = pk;

    if mod(iter,1) == 0
        sprintf('iteration %d fval %0.5e norm(fval-fprev) %0.5e norm(dfval) %0.5e alpha %0.5e', iter, fval, res(iter,1), res(iter,3), alpha)
        save('hw5_newtoncg_intermediates.mat');
    end

    if res(iter,1) < tolfun
        disp('norm(f(x_current)-f(x_previous)) < 1e-10');
        break
    end

    if res(iter,3) < tolopt
        disp('norm(df(x_current)) < 1e-10');
        break
    end

    x_0 = x_k; % update x_previous to x_current
    fval_prev = fval; % update f(x_previous)
    iter = iter + 1;
end
toc;
save('hw5_newtoncg_1000.mat')
%fig = plot(linspace(1,iter,iter),res(1:iter,:));
%xlabel('Iteration') % x-axis label
%ylabel('norm of error')
%legend('starting point a = [-1;0]','starting point b = [1;1]','Location','southeast')
%th  = title('Compare error norm of steepest descent method');

function [Hd,dfeval_iter] = Hd_est(x,dfval,d,h,df)
% Math 571 HW 5
%
% function [Hd,feval_iter] = Hd_est(x,fval,dfval,alpha,p)
%
% Helper function for estimating Hessian of function * d, where d is some
% direction. H(x_current)*d ~= (df(x+h*d)- df(x))/h
%
% Input
%   x - current point
%   dfval - df(x_current)
%   d - some direction
%   h - differencing interval, 1e-6 results
%   in norm(Hd_est(xlr,dfval,-dfval,1e-6) = 3.9e7 which does not change much with smaller h
%   df - function handle for df(x)
% Output
%   Hd - estimated Hessian*d at current point using finite difference
%   dfeval_iter - number of evaluation of f(x+alpha*df(x))
%

%[fk,dfk] = funwithgrad(x+h*d); % df(x+h*d)

dfk = df(x+h*d);

dfeval_iter = 1;

Hd = (dfk - dfval)/h;

end


function [pk,dfeval_iter,cg_iter] = newtoncg_direction(x,dfval,f,df)
% Math 571 HW 5 Q2
%
% function [pk,dfeval_iter] = newtoncg_direction(x,dfval,df)
%
% Helper function for finding newton CG direction
%
% Input
%   x - current point
%   dfval - df(x)
%   f - function handle for f(x)
%   df - function handle for df(x)
%
% Output
%   pk - newton CG descent direction
%   dfeval_iter - number of evaluation of f(x+alpha*df(x))
%   cg_iter - number of CG iteration
%

r_0 = dfval;
u_0 = -dfval; % some descent direction, set to -df(x)
h = 1e-6; % differencing for approximate Hessian

tol = min(0.5, sqrt(norm(dfval))); % tolerance parameter
pk = zeros(size(x,1),1); % initial direction is 0, always conjugate
dfeval_iter = 0;

cg_iter = 0;

while norm(r_0) >= tol*norm(dfval)
    [Hdk, dfeval_iter_Hd] = Hd_est(x,dfval,u_0,h,df); % estimate Hessian*d at current x
    dfeval_iter = dfeval_iter + dfeval_iter_Hd;

    dk_H_dk = u_0'*Hdk;
    if dk_H_dk <= 0 % d*Hessian(x_current)*d, detect negative curvature
        if cg_iter == 0
            pk = -dfval;
            break
        else
            break % return pk
        end
    end

    ak = -(r_0'*r_0)/(dk_H_dk);
    pk = pk + (ak*u_0); % update descent direction
    rk = r_0 + (ak*Hdk);
    bk = (rk'*rk)/(r_0'*r_0);
    uk = -rk + (bk*u_0);

    cg_iter = cg_iter + 1;
    r_0 = rk;
    u_0 = uk;


end


function [alpha,feval_iter] = linesearch(x,fval,dfval,alpha,p,f)
% Math 571 HW 5
%
% function [alpha,feval_iter] = linesearch(fval,dfval,alpha)
%
% Helper function for line search given f(x), df(x) and initial alpha, find
% alpha such that the Armijo condition, f(x+alpha*df(x)) <= f(x) + alpha*(df(x)'*p), where p is
% some descent direction, is satisfied. Update alpha = alpha/2 when the Armijo condition
% is not satisfied. p = -df(x) for steepest descent.
%
% Input
%   x - current point
%   fval - f(x)
%   dfval - df(x)
%   alpha - initial step size in (0,1]
%   p - descent direction
%   f - function handle for f(x)
%
% Output
%   alpha - final step size
%   feval_iter - number of evaluation of f(x+alpha*df(x))
%

%fk = funwithgrad(x+alpha*p); % f(x+alpha*df(x))

n = 16384;
% f = @(x) dot(ones(n,1),((ones(n,1) + A1*(((D1*x)+d1).^2) + A2*(((D2*x)+d2).^2)).^0.5)); % function
fk = f(x+alpha*p);

%df = @(x) ((D1'*diag((D1*x)+d1))*A1' + (D2'*diag((D2*x)+d2))*A2')*(1./((ones(n,1) + A1*(((D1*x)+d1).^2) + A2*(((D2*x)+d2).^2)).^0.5)); % gradient of function

feval_iter = 1;
c = 1e-2; % some parameter for line search in [0,1]

while fk > fval + c*alpha*(dfval'*p)
    %feval_iter < 20

    % Armijo condition is satisfied
    %if fk <= fval + c*alpha*(dfval'*p)
    %    break
    %end

    alpha = alpha/2; % update step size
    fk = f(x+alpha*p); % evaluate f(x+alpha*p)
    feval_iter = feval_iter + 1;

    %if feval_iter == 20
    %    alpha = 0;
    %    break
    %end

end
	% hw5_newtoncg.m
	%
	% Math 571 HW 5 Q1.
	%
	% Script to compute and plot objective function value, norm of gradient of f(x), number of function and gradient evaluations, and runtime of using Newton CG to minimize
	% f(x) = e^T* (e + A_1(D_1x + d_1).^2) + A_2(D_2x + d_2).^2).^1/2)
	% CG finds descent direction p_i that are conjugate to each other when
	% iterating x_k+1 = x_k + alpha*p_k


	load 'MinSurfProb.mat'
	tic;

	x_0 = xlr; % starting point
	%x_k = x_0;

	toliter = 10; % max number of iterations
	tolfun = 1e-10; % tolerance for ending steepest descent when \|\|f(x_k+1) - f(x_k)\|\| < tolfun
	tolopt = 1e-10; % tolerance for ending steepest descent when \|\| df(x_k) \|\| < tolopt
	alpha = 1; % initial step size

	% store result of intermediate iterations
	% \|\|f(x_curent) - f(x_previous)\|\|,
	% objective function value,
	% norm of gradient of f(x),
	% number of function, gradient, CG evaluations
	% step size
	res = zeros(toliter,7);

	% store intermediate points
	xpoints = zeros(toliter,size(xlr,1));

	% store CG directions
	cgp = zeros(toliter, size(xlr,1));

	iter = 1;
	feval_iter = 1; % count f(x) evaluation
	dfeval_iter = 1; % count df(x) evaluation
	cg_iter = 0; % count CG iterations

	n = size(A1,1); % 16384
	f = @(x) dot(ones(n,1),((ones(n,1) + A1(((D1x)+d1).^2) + A2(((D2x)+d2).^2)).^0.5)); % function
	df = @(x) ((D1'diag((D1x)+d1))A1' + (D2'diag((D2x)+d2))A2')(1./((ones(n,1) + A1(((D1x)+d1).^2) + A2(((D2*x)+d2).^2)).^0.5)); % gradient of function

	%[fval,dfval] = funwithgrad(x_0); %too slow

	fval = f(x_0); % evaluate f(x_current) and df(x_current)
	dfval = df(x_0);
	fval_prev = fval;

	% update result
	res(1,1) = norm(fval - fval_prev);
	res(1,2) = fval;
	res(1,3) = norm(dfval);
	res(1,4) = feval_iter;
	res(1,5) = dfeval_iter;
	res(1,7) = alpha;
	xpoints(1,:) = x_0;

	iter = iter + 1; % 1st iteration occured

	while iter < toliter

	[pk,dfeval_iter_cg,cg_iter_true] = newtoncg_direction(x_0,dfval,f,df);
	cg_iter = cg_iter + cg_iter_true;

	[alpha,feval_iter_LS] = linesearch(x_0,fval,dfval,1,pk,f); % line search, initial step size = 1, p from CG

	x_k = x_0 + alpha(pk); % x_k = x_0 + alphap

	fval = f(x_k);% update f(x_current), df(x_current)
	dfval = df(x_k);

	feval_iter = feval_iter + feval_iter_LS + 1; % update number of f(x) evaluation
	dfeval_iter = dfeval_iter + dfeval_iter_cg + 1;

	res(iter,1) = norm(fval - fval_prev); % store \|\| f(x_current) - f(x_previous) \|\|
	res(iter,2) = fval; % f(x_current)
	res(iter,3) = norm(dfval); % df(x_current)
	res(iter,4) = feval_iter;
	res(iter,5) = dfeval_iter;
	res(iter,6) = cg_iter;
	res(iter,7) = alpha;
	xpoints(iter,:) = x_k;
	cgp(iter,:) = pk;

	if mod(iter,1) == 0
	sprintf('iteration %d fval %0.5e norm(fval-fprev) %0.5e norm(dfval) %0.5e alpha %0.5e', iter, fval, res(iter,1), res(iter,3), alpha)
	save('hw5_newtoncg_intermediates.mat');
	end

	if res(iter,1) < tolfun
	disp('norm(f(x_current)-f(x_previous)) < 1e-10');
	break
	end

	if res(iter,3) < tolopt
	disp('norm(df(x_current)) < 1e-10');
	break
	end

	x_0 = x_k; % update x_previous to x_current
	fval_prev = fval; % update f(x_previous)
	iter = iter + 1;
	end
	toc;
	save('hw5_newtoncg_1000.mat')
	%fig = plot(linspace(1,iter,iter),res(1:iter,:));
	%xlabel('Iteration') % x-axis label
	%ylabel('norm of error')
	%legend('starting point a = [-1;0]','starting point b = [1;1]','Location','southeast')
	%th = title('Compare error norm of steepest descent method');

	function [Hd,dfeval_iter] = Hd_est(x,dfval,d,h,df)
	% Math 571 HW 5
	%
	% function [Hd,feval_iter] = Hd_est(x,fval,dfval,alpha,p)
	%
	% Helper function for estimating Hessian of function * d, where d is some
	% direction. H(x_current)d ~= (df(x+hd)- df(x))/h
	%
	% Input
	% x - current point
	% dfval - df(x_current)
	% d - some direction
	% h - differencing interval, 1e-6 results
	% in norm(Hd_est(xlr,dfval,-dfval,1e-6) = 3.9e7 which does not change much with smaller h
	% df - function handle for df(x)
	% Output
	% Hd - estimated Hessian*d at current point using finite difference
	% dfeval_iter - number of evaluation of f(x+alpha*df(x))
	%

	%[fk,dfk] = funwithgrad(x+hd); % df(x+hd)

	dfk = df(x+h*d);

	dfeval_iter = 1;

	Hd = (dfk - dfval)/h;

	end



	function [pk,dfeval_iter,cg_iter] = newtoncg_direction(x,dfval,f,df)
	% Math 571 HW 5 Q2
	%
	% function [pk,dfeval_iter] = newtoncg_direction(x,dfval,df)
	%
	% Helper function for finding newton CG direction
	%
	% Input
	% x - current point
	% dfval - df(x)
	% f - function handle for f(x)
	% df - function handle for df(x)
	%
	% Output
	% pk - newton CG descent direction
	% dfeval_iter - number of evaluation of f(x+alpha*df(x))
	% cg_iter - number of CG iteration
	%

	r_0 = dfval;
	u_0 = -dfval; % some descent direction, set to -df(x)
	h = 1e-6; % differencing for approximate Hessian

	tol = min(0.5, sqrt(norm(dfval))); % tolerance parameter
	pk = zeros(size(x,1),1); % initial direction is 0, always conjugate
	dfeval_iter = 0;

	cg_iter = 0;

	while norm(r_0) >= tol*norm(dfval)
	[Hdk, dfeval_iter_Hd] = Hd_est(x,dfval,u_0,h,df); % estimate Hessian*d at current x
	dfeval_iter = dfeval_iter + dfeval_iter_Hd;

	dk_H_dk = u_0'*Hdk;
	if dk_H_dk <= 0 % dHessian(x_current)d, detect negative curvature
	if cg_iter == 0
	pk = -dfval;
	break
	else
	break % return pk
	end
	end

	ak = -(r_0'*r_0)/(dk_H_dk);
	pk = pk + (ak*u_0); % update descent direction
	rk = r_0 + (ak*Hdk);
	bk = (rk'rk)/(r_0'r_0);
	uk = -rk + (bk*u_0);

	cg_iter = cg_iter + 1;
	r_0 = rk;
	u_0 = uk;


	end


	function [alpha,feval_iter] = linesearch(x,fval,dfval,alpha,p,f)
	% Math 571 HW 5
	%
	% function [alpha,feval_iter] = linesearch(fval,dfval,alpha)
	%
	% Helper function for line search given f(x), df(x) and initial alpha, find
	% alpha such that the Armijo condition, f(x+alphadf(x)) <= f(x) + alpha(df(x)'*p), where p is
	% some descent direction, is satisfied. Update alpha = alpha/2 when the Armijo condition
	% is not satisfied. p = -df(x) for steepest descent.
	%
	% Input
	% x - current point
	% fval - f(x)
	% dfval - df(x)
	% alpha - initial step size in (0,1]
	% p - descent direction
	% f - function handle for f(x)
	%
	% Output
	% alpha - final step size
	% feval_iter - number of evaluation of f(x+alpha*df(x))
	%

	%fk = funwithgrad(x+alphap); % f(x+alphadf(x))

	n = 16384;
	% f = @(x) dot(ones(n,1),((ones(n,1) + A1(((D1x)+d1).^2) + A2(((D2x)+d2).^2)).^0.5)); % function
	fk = f(x+alpha*p);

	%df = @(x) ((D1'diag((D1x)+d1))A1' + (D2'diag((D2x)+d2))A2')(1./((ones(n,1) + A1(((D1x)+d1).^2) + A2(((D2*x)+d2).^2)).^0.5)); % gradient of function

	feval_iter = 1;
	c = 1e-2; % some parameter for line search in [0,1]

	while fk > fval + calpha(dfval'*p)
	%feval_iter < 20

	% Armijo condition is satisfied
	%if fk <= fval + calpha(dfval'*p)
	% break
	%end

	alpha = alpha/2; % update step size
	fk = f(x+alphap); % evaluate f(x+alphap)
	feval_iter = feval_iter + 1;

	%if feval_iter == 20
	% alpha = 0;
	% break
	%end

	end