jiayuzhou/check_grad.m

## check_grad.m
function check_grad(f, x0, varargin)
% a simple function that checks the correctness of gradient.
% INPUT
%  f  - a function handle of f(x) that returns function values and gradients given parameter x
%  x0 - the location near which the gradient will be evaluted.

% For a correct gradiet, the displayed ratio should be near 1.0
%
% to check why the code works there is a useful link:
%    http://ufldl.stanford.edu/tutorial/supervised/DebuggingGradientChecking/
%
% Jiayu, Dec 2, 2015

delta = rand(size(x0));
delta = delta ./ norm(delta);
epsilon = 10.^[-7:-1];

[f0, df0] = feval(f, x0, varargin{:});

for i = 1:length(epsilon)
    [f_left] = feval(f, x0-epsilon(i)*delta, varargin{:});
    [f_right] = feval(f, x0+epsilon(i)*delta, varargin{:});
    ys(i) = (f_right - f_left) / 2;
    ys_hat(i) = df0' * epsilon(i)*delta;
    fprintf('epsilon: %d , gradient: %d \n', epsilon(i), ys(i) / ys_hat(i));
end

## check_grad_example_matrix.m
function check_grad_example_matrix(feature_dim, dic_size, sample_size)
% an example of check_grad on the dictionary learning:
%          min_{alpha, X} || R - alpha * D * X ||_F^2
%
% by Jiayu Zhou. July 9, 2015.

if nargin < 1, feature_dim = 50; end
if nargin < 2, dic_size    = 20; end
if nargin < 3, sample_size = 30; end

Rdata = randn(feature_dim, sample_size);
Dic   = randn(feature_dim, dic_size);
vect0 = rand(dic_size * sample_size + 1, 1)

% closure on the constant variables.
test_func = @(x) dic_obj(x, Dic, Rdata)

% perform testing.
check_grad(test_func, vect0)


function [f, g] = dic_obj(variable_vect, D, R)
% The function value and gradient of the following objective
%     min_{alpha, X} || R - alpha * D * X ||_F^2
% where
% INPUT
%   [X(:); alpha]
% OUTPUT
% given the search point, variable_vect
%   f - function value
%   g - the vectorized gradient

% the size of the features and dictionary
dic_size    = size(D, 2);
sample_size = size(R, 2);

% reshape variables
a = variable_vect(end);
X = reshape(variable_vect(1:end-1), [ dic_size, sample_size] );

aDX  = a * D * X;
RaDX = R - aDX;

% compute the objective
f = sum(sum((RaDX).^2));

% compute gradients
grad_X = - (2 * a) * D' * RaDX;
grad_a = - 2 * sum(sum((RaDX' * D)' .* X));
%grad_a = - 2 * trace((RaDX' * D) * X);  % less efficient but readable version

% the vectorized gradient
g = [grad_X(:); grad_a];

## check_grad_example_vector.m
function check_grad_example_vector(feature_dim, sample_size)
% an example of check_grad on the Lasso
%          min_{x} || A * x - y ||_F^2
%
% by Jiayu Zhou. Dec 2, 2015.

if nargin < 1, feature_dim = 500; end
if nargin < 3, sample_size = 30; end

A  = randn(sample_size, feature_dim);
y  = randn(sample_size, 1);
x0 = rand(feature_dim, 1);

% closure on the constant variables.
test_func = @(x) dic_obj(x, A, y);

% perform testing.
check_grad(test_func, x0)


function [f, g] = dic_obj(x, A, y)
% The function value and gradient of the following objective
%     min_{x} || A * x - y ||_F^2
% where
% INPUT
%   [X(:); alpha]
% OUTPUT
% given the search point, variable_vect
%   f - function value
%   g - the vectorized gradient

Axy = (A * x - y);
g = A' * Axy;
f = 0.5 * sum(Axy.^2);
	function check_grad(f, x0, varargin)
	% a simple function that checks the correctness of gradient.
	% INPUT
	% f - a function handle of f(x) that returns function values and gradients given parameter x
	% x0 - the location near which the gradient will be evaluted.

	% For a correct gradiet, the displayed ratio should be near 1.0
	%
	% to check why the code works there is a useful link:
	% http://ufldl.stanford.edu/tutorial/supervised/DebuggingGradientChecking/
	%
	% Jiayu, Dec 2, 2015

	delta = rand(size(x0));
	delta = delta ./ norm(delta);
	epsilon = 10.^[-7:-1];

	[f0, df0] = feval(f, x0, varargin{:});

	for i = 1:length(epsilon)
	[f_left] = feval(f, x0-epsilon(i)*delta, varargin{:});
	[f_right] = feval(f, x0+epsilon(i)*delta, varargin{:});
	ys(i) = (f_right - f_left) / 2;
	ys_hat(i) = df0' * epsilon(i)*delta;
	fprintf('epsilon: %d , gradient: %d \n', epsilon(i), ys(i) / ys_hat(i));
	end
	function check_grad_example_matrix(feature_dim, dic_size, sample_size)
	% an example of check_grad on the dictionary learning:
	% min_{alpha, X} \|\| R - alpha * D * X \|\|_F^2
	%
	% by Jiayu Zhou. July 9, 2015.

	if nargin < 1, feature_dim = 50; end
	if nargin < 2, dic_size = 20; end
	if nargin < 3, sample_size = 30; end

	Rdata = randn(feature_dim, sample_size);
	Dic = randn(feature_dim, dic_size);
	vect0 = rand(dic_size * sample_size + 1, 1)

	% closure on the constant variables.
	test_func = @(x) dic_obj(x, Dic, Rdata)

	% perform testing.
	check_grad(test_func, vect0)


	function [f, g] = dic_obj(variable_vect, D, R)
	% The function value and gradient of the following objective
	% min_{alpha, X} \|\| R - alpha * D * X \|\|_F^2
	% where
	% INPUT
	% [X(:); alpha]
	% OUTPUT
	% given the search point, variable_vect
	% f - function value
	% g - the vectorized gradient

	% the size of the features and dictionary
	dic_size = size(D, 2);
	sample_size = size(R, 2);

	% reshape variables
	a = variable_vect(end);
	X = reshape(variable_vect(1:end-1), [ dic_size, sample_size] );

	aDX = a * D * X;
	RaDX = R - aDX;

	% compute the objective
	f = sum(sum((RaDX).^2));

	% compute gradients
	grad_X = - (2 * a) * D' * RaDX;
	grad_a = - 2 * sum(sum((RaDX' * D)' .* X));
	%grad_a = - 2 * trace((RaDX' * D) * X); % less efficient but readable version

	% the vectorized gradient
	g = [grad_X(:); grad_a];
	function check_grad_example_vector(feature_dim, sample_size)
	% an example of check_grad on the Lasso
	% min_{x} \|\| A * x - y \|\|_F^2
	%
	% by Jiayu Zhou. Dec 2, 2015.

	if nargin < 1, feature_dim = 500; end
	if nargin < 3, sample_size = 30; end

	A = randn(sample_size, feature_dim);
	y = randn(sample_size, 1);
	x0 = rand(feature_dim, 1);

	% closure on the constant variables.
	test_func = @(x) dic_obj(x, A, y);

	% perform testing.
	check_grad(test_func, x0)


	function [f, g] = dic_obj(x, A, y)
	% The function value and gradient of the following objective
	% min_{x} \|\| A * x - y \|\|_F^2
	% where
	% INPUT
	% [X(:); alpha]
	% OUTPUT
	% given the search point, variable_vect
	% f - function value
	% g - the vectorized gradient

	Axy = (A * x - y);
	g = A' * Axy;
	f = 0.5 * sum(Axy.^2);