Skip to content

Instantly share code, notes, and snippets.

@oseiskar
Last active November 15, 2015 09:53
Show Gist options
  • Save oseiskar/9408fc602705da19ae87 to your computer and use it in GitHub Desktop.
Save oseiskar/9408fc602705da19ae87 to your computer and use it in GitHub Desktop.
Parametric bootstrap experiment
%% GNU Octave
% The model
function y = model(x, y0, theta, sigma)
noise = randn(size(x))*sigma;
y = y0 * exp(-theta*x) + noise;
endfunction
% Least squares-type fit to the model
function [y0, theta, sigma] = fit_model(x, y)
y = abs(y);
n = numel(x);
m_theta_and_lny0 = ([x(:),ones(n,1)] \ log(y(:)));
theta = -m_theta_and_lny0(1);
y0 = exp(m_theta_and_lny0(2));
yrec = model(x, y0, theta, 0);
sigma = sqrt(sum((yrec - y).^2)/(n-1));
endfunction
% Example: two different populations, A and B
fprintf('--- Actual parameters \n');
x = linspace(0,5, 50);
% note: swapping A and B seem to significantly worsen the results
y0_a = 0.8
y0_b = 1.7
theta_a = 0.8
theta_b = 1.3
sigma_a = 0.1
sigma_b = 0.15
y_a = model(x, y0_a, theta_a, sigma_a);
y_b = model(x, y0_b, theta_b, sigma_b);
fprintf('--- Estimated parameters \n');
[y0_a_est, theta_a_est, sigma_a_est] = fit_model(x, y_a)
[y0_b_est, theta_b_est, sigma_b_est] = fit_model(x, y_b)
fprintf('--- Parametric bootstrap test \n');
% Define test statistic z (Euclidean distance of vectors)
function z = test_statistic(pop_a, pop_b)
z = sqrt(sum((pop_a - pop_b).^2, 2));
endfunction
function z = bootstrap(x, y0, theta, sigma, n_samples)
x_mat = repmat(x, n_samples, 1);
pop_a = model(x_mat, y0, theta, sigma);
pop_b = model(x_mat, y0, theta, sigma);
z = test_statistic(pop_a, pop_b);
endfunction
% Parametric bootstrap: compute distribution of z under the null hypothesis:
% population B has the same distribution, i.e., model parameters as A
% (same as the estimated parameters)
n_samples = 100000
diffs = bootstrap(x, y0_a_est, theta_a_est, sigma_a_est, n_samples);
figure(1);
hist(diffs, bins=50);
title('Distribution of z');
% Find threshold of z for the given significance level
significance_level = 0.05
threshold = quantile(diffs, 1-significance_level)
% Compute the actual test statistic for A and B
z_value = test_statistic(y_a, y_b)
null_hypothesis_rejected = z_value > threshold
figure(3);
plot(x,y_a, 'ro');
hold on;
plot(x,y_b, 'bo');
plot(x, model(x, y0_a_est, theta_a_est, 0), 'r--');
plot(x, model(x, y0_b_est, theta_b_est, 0), 'b--');
plot(x, model(x, y0_a, theta_a, 0), 'r-');
plot(x, model(x, y0_b, theta_b, 0), 'b-');
hold off;
legend('sample from A', 'sample from B',
'estimated A', 'estimated B',
'actual A', 'actual B');
input('- press enter -');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment