lell/sgamfit.m

## sgamfit.m
function [k, theta, t, pval] = sgamfit(xs)
%SGAMFIT fits a shifted GAMMA distribution to a provided set of
%           observations. It uses a smart initialization for the MATLAB
%           constrained optimizer FMINCON to find the ML fit.
%
%   [K, THETA, T, PVAL] = SGAMFIT(XS), returns ML parameters for a
%           shifted GAMMA distribution fit to XS, along with the P-value for a
%           KS-test of the goodness of fit.
%
% Inputs:
%           XS 1xN array of observations.
%
% Outputs:
%           K real scalar for shape of ML fit.
%
%           THETA real scalar for scale of ML fit.
%
%           T real scalar for offset of ML fit. So, if
%                     [K, THETA, T, ~] = SGAMFIT(XS), then the ML fit implies
%                     that XS - T is distributed as a GAMMA random variable with
%                     shape K and scale THETA.
%
%           PVAL real scalar in [0, 1] indicating P-value for a KS-test for the
%                     null hypothesis that XS is drawn from the random variable
%                     indicated above. This test is only performed if a PVAL
%                     output variable is provided (i.e., if the result of
%                     SGAMFIT is assigned to [K, THETA, T, PVAL]).
%
% Example:
%
%           > %%% %%% %%%
%           > XS = gamrnd(11, 22, 1, 1000000) - 44;
%           > [K, THETA, T, PVAL] = SGAMFIT(XS)
%           K =
%
%              10.9818
%
%
%           THETA =
%
%              22.0338
%
%
%           T =
%
%             -43.9410
%
%
%           PVAL =
%
%               0.9701
%
%
%           > %%% %%% %%%
%           > XS = gamrnd(11, 22, 1, 1000000);
%           > [K, THETA] = SGAMFIT(XS)
%
%           K =
%
%              11.3127
%
%
%           THETA =
%
%              21.6647
%
%
%           > %%% %%% %%%
%
% Copyright (c) 2016, Lloyd T. Elliott
%
% All rights reserved.
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions are met:
%
% 1. Redistributions of source code must retain the above copyright notice, this
% list of conditions and the following disclaimer.
%
% 2. Redistributions in binary form must reproduce the above copyright notice,
% this list of conditions and the following disclaimer in the documentation
% and/or other materials provided with the distribution.
%
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
% IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
% DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
% FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
% DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
% SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
% CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
% OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
% OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


if isempty(xs) || ~isnumeric(xs) || ~isreal(xs) || any(isnan(xs)) || ...
        size(xs, 1) ~= 1 || any(isinf(xs))

    error(['SGAMFIT: XS must be a nonempty 1xN matrix of real' ...
        'non-nan/non-inf values.']);
end
N = length(xs);
sx = sum(xs);

% function to map parameters to log likelihood given by observations.
function y = curry(p)
    k_  = p(1);
    theta_ = p(2);
    t_ = p(3);
    z = xs - t_;
    if any(z <= 0)
        y = Inf;
    else
        lz = sum(log(z));
        sz = sx - N * t_;
        y = N*gammaln(k_) + N*k_*log(theta_) - (k_ - 1)*lz + sz/theta_;
    end
end
left = min(xs);
right = max(xs);
range = right - left;
epsilon = range * 0.05;

% estimate for offset: minimum observed value minus 5% of observed value range.
t_est = left - epsilon;

% estimate for shape: mean of observation shifted by the offset.
k_est = mean(xs) - t_est;

% estimate for scale: 1.0.
theta_est = 1.0;
lb = [0 0 -Inf];
ub = [Inf Inf left];
p0 = [k_est theta_est t_est];
options = optimset('Display', 'notify-detailed');
p = fmincon(@curry, p0, [], [], [], [], lb, ub, [], options);
k = p(1);
theta = p(2);
t = p(3);
dist = makedist('Gamma', 'a', k, 'b', theta);
if nargout == 4
    [~, pval] = kstest(xs - t, 'CDF', dist);
else
    pval = nan;
end
end
	function [k, theta, t, pval] = sgamfit(xs)
	%SGAMFIT fits a shifted GAMMA distribution to a provided set of
	% observations. It uses a smart initialization for the MATLAB
	% constrained optimizer FMINCON to find the ML fit.
	%
	% [K, THETA, T, PVAL] = SGAMFIT(XS), returns ML parameters for a
	% shifted GAMMA distribution fit to XS, along with the P-value for a
	% KS-test of the goodness of fit.
	%
	% Inputs:
	% XS 1xN array of observations.
	%
	% Outputs:
	% K real scalar for shape of ML fit.
	%
	% THETA real scalar for scale of ML fit.
	%
	% T real scalar for offset of ML fit. So, if
	% [K, THETA, T, ~] = SGAMFIT(XS), then the ML fit implies
	% that XS - T is distributed as a GAMMA random variable with
	% shape K and scale THETA.
	%
	% PVAL real scalar in [0, 1] indicating P-value for a KS-test for the
	% null hypothesis that XS is drawn from the random variable
	% indicated above. This test is only performed if a PVAL
	% output variable is provided (i.e., if the result of
	% SGAMFIT is assigned to [K, THETA, T, PVAL]).
	%
	% Example:
	%
	% > %%% %%% %%%
	% > XS = gamrnd(11, 22, 1, 1000000) - 44;
	% > [K, THETA, T, PVAL] = SGAMFIT(XS)
	% K =
	%
	% 10.9818
	%
	%
	% THETA =
	%
	% 22.0338
	%
	%
	% T =
	%
	% -43.9410
	%
	%
	% PVAL =
	%
	% 0.9701
	%
	%
	% > %%% %%% %%%
	% > XS = gamrnd(11, 22, 1, 1000000);
	% > [K, THETA] = SGAMFIT(XS)
	%
	% K =
	%
	% 11.3127
	%
	%
	% THETA =
	%
	% 21.6647
	%
	%
	% > %%% %%% %%%
	%
	% Copyright (c) 2016, Lloyd T. Elliott
	%
	% All rights reserved.
	%
	% Redistribution and use in source and binary forms, with or without
	% modification, are permitted provided that the following conditions are met:
	%
	% 1. Redistributions of source code must retain the above copyright notice, this
	% list of conditions and the following disclaimer.
	%
	% 2. Redistributions in binary form must reproduce the above copyright notice,
	% this list of conditions and the following disclaimer in the documentation
	% and/or other materials provided with the distribution.
	%
	% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	% IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	% DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
	% FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	% DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
	% SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
	% CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	% OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	% OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


	if isempty(xs) \|\| ~isnumeric(xs) \|\| ~isreal(xs) \|\| any(isnan(xs)) \|\| ...
	size(xs, 1) ~= 1 \|\| any(isinf(xs))

	error(['SGAMFIT: XS must be a nonempty 1xN matrix of real' ...
	'non-nan/non-inf values.']);
	end
	N = length(xs);
	sx = sum(xs);

	% function to map parameters to log likelihood given by observations.
	function y = curry(p)
	k_ = p(1);
	theta_ = p(2);
	t_ = p(3);
	z = xs - t_;
	if any(z <= 0)
	y = Inf;
	else
	lz = sum(log(z));
	sz = sx - N * t_;
	y = Ngammaln(k_) + Nk_log(theta_) - (k_ - 1)lz + sz/theta_;
	end
	end
	left = min(xs);
	right = max(xs);
	range = right - left;
	epsilon = range * 0.05;

	% estimate for offset: minimum observed value minus 5% of observed value range.
	t_est = left - epsilon;

	% estimate for shape: mean of observation shifted by the offset.
	k_est = mean(xs) - t_est;

	% estimate for scale: 1.0.
	theta_est = 1.0;
	lb = [0 0 -Inf];
	ub = [Inf Inf left];
	p0 = [k_est theta_est t_est];
	options = optimset('Display', 'notify-detailed');
	p = fmincon(@curry, p0, [], [], [], [], lb, ub, [], options);
	k = p(1);
	theta = p(2);
	t = p(3);
	dist = makedist('Gamma', 'a', k, 'b', theta);
	if nargout == 4
	[~, pval] = kstest(xs - t, 'CDF', dist);
	else
	pval = nan;
	end
	end