zthomas/cost_function.m

## cost_function.m
function [J, grad] = lrCostFunction(theta, X, y, lambda)
%LRCOSTFUNCTION Compute cost and gradient for logistic regression with
%regularization
%   J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using
%   theta as the parameter for regularized logistic regression and the
%   gradient of the cost w.r.t. to the parameters.

% Initialize some useful values
m = length(y); % number of training examples

% You need to return the following variables correctly
J = 0;
grad = zeros(size(theta));

% ====================== YOUR CODE HERE ======================
% Instructions: Compute the cost of a particular choice of theta.
%               You should set J to the cost.
%               Compute the partial derivatives and set grad to the partial
%               derivatives of the cost w.r.t. each parameter in theta
%
% Hint: The computation of the cost function and gradients can be
%       efficiently vectorized. For example, consider the computation
%
%           sigmoid(X * theta)
%
%       Each row of the resulting matrix will contain the value of the
%       prediction for that example. You can make use of this to vectorize
%       the cost function and gradient computations.
%
% Hint: When computing the gradient of the regularized cost function,
%       there're many possible vectorized solutions, but one solution
%       looks like:
%           grad = (unregularized gradient for logistic regression)
%           temp = theta;
%           temp(1) = 0;   % because we don't add anything for j = 0
%           grad = grad + YOUR_CODE_HERE (using the temp variable)
%

% theta is a vector of size n
% X is mxn (m samples and n parameters)
H = sigmoid(X*theta); % H is a vector of length m

% ensure that we don't regularize theta 1
theta(1) = 0;

% doing some algebra so simplify the summation, let C(i) = log(H(i)) and D(i) = log(1-H(i)
% step(i) = -y(i)*C(i) - (1-y(i))*D(i) = y(i))*D(i) - y(i)*C(i) - D(i)
% since summation is linear, we can calculate three different summations using matrices
J = ( transpose(y)*log(1-H) - transpose(y)*log(H) - sum(log(1-H)) )/m;

% adding the regularization factor
J = J + lambda * sum( theta .^ 2 ) / (2*m);

% vectorized gradient calculation
grad = 1/m * transpose(X) * (H-y);

% adding the regularization factor
grad = grad + lambda * theta / m;

% =============================================================

grad = grad(:);

end
	function [J, grad] = lrCostFunction(theta, X, y, lambda)
	%LRCOSTFUNCTION Compute cost and gradient for logistic regression with
	%regularization
	% J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using
	% theta as the parameter for regularized logistic regression and the
	% gradient of the cost w.r.t. to the parameters.

	% Initialize some useful values
	m = length(y); % number of training examples

	% You need to return the following variables correctly
	J = 0;
	grad = zeros(size(theta));

	% ====================== YOUR CODE HERE ======================
	% Instructions: Compute the cost of a particular choice of theta.
	% You should set J to the cost.
	% Compute the partial derivatives and set grad to the partial
	% derivatives of the cost w.r.t. each parameter in theta
	%
	% Hint: The computation of the cost function and gradients can be
	% efficiently vectorized. For example, consider the computation
	%
	% sigmoid(X * theta)
	%
	% Each row of the resulting matrix will contain the value of the
	% prediction for that example. You can make use of this to vectorize
	% the cost function and gradient computations.
	%
	% Hint: When computing the gradient of the regularized cost function,
	% there're many possible vectorized solutions, but one solution
	% looks like:
	% grad = (unregularized gradient for logistic regression)
	% temp = theta;
	% temp(1) = 0; % because we don't add anything for j = 0
	% grad = grad + YOUR_CODE_HERE (using the temp variable)
	%

	% theta is a vector of size n
	% X is mxn (m samples and n parameters)
	H = sigmoid(X*theta); % H is a vector of length m

	% ensure that we don't regularize theta 1
	theta(1) = 0;

	% doing some algebra so simplify the summation, let C(i) = log(H(i)) and D(i) = log(1-H(i)
	% step(i) = -y(i)C(i) - (1-y(i))D(i) = y(i))D(i) - y(i)C(i) - D(i)
	% since summation is linear, we can calculate three different summations using matrices
	J = ( transpose(y)log(1-H) - transpose(y)log(H) - sum(log(1-H)) )/m;

	% adding the regularization factor
	J = J + lambda * sum( theta .^ 2 ) / (2*m);

	% vectorized gradient calculation
	grad = 1/m * transpose(X) * (H-y);

	% adding the regularization factor
	grad = grad + lambda * theta / m;

	% =============================================================

	grad = grad(:);

	end