wanirepo/wani_robustPCA_testing.m

## wani_robustPCA_testing.m
cd('/Users/clinpsywoo/github/fastRPCA');
setup_fastRPCA;

% load an example data
load('/Users/clinpsywoo/github/SAS2015_PatRec/data.mat');

X = dat.dat;
SS = svd(X, 'econ');  % can decompose x or x', but faster when rows >> cols
                      % 3 solvers (5 variants)
                      %   1) constrained
                      %   2) lagrangian: two versions: sum of L, S, or
                      %      max L,S.  two params: lambda L,S
                      %   3) SPGL1: fixed epsilon (SSE bound), get rid of
                      %      one parameter

                      % Pick SSE cutoff for epsilon
                      %   - We can choose based on how much residual
                      %     variance unexplained we want

cutoff = max(find(cumsum(SS.^2)./sum(SS.^2)<.95));
                      % choose the cutoff - less than 95% explained variance
epsilon = sqrt(sum(SS(cutoff:end).^2));
                      % choose lower eps in practice

%%
clear opts;
%opts = [];
opts.max = true; % max or sum method.
                 % max method preferred in general because more stable
                 % solution...for algorithmic reasons...

opts.tau0 = 1193.62;
% opts.tol: tolerance for convergence. higher tol is faster
% higher epsilon : faster...

opts.L1L2 = 'cols';

A_cell = [];     % this is for partial observations (missing data) or
                 % partial weights for noisy data/outliers

lambda_S = 50;  % constraint parameter:
                 % larger lambda_S, more sparsity; smaller lambda_S, less
                 % 0 to Inf... 0 should be no penalty (don't do it)

% Other considerations:
%   try different lamda_S values.
%   once you find good tau value, use it to initialize
%   sparsity in output -> if 0%, then S is zero, all sparse, reduce lambda_S...
%   could use AIC/BIC to optimize sparseness approximately...

% Output
%   L is 'denoised' data matrix (run SVD to get distributed components)
%   S is sparse data matrix of excluded from L

% complexity/slowness:  m^2 * v

[L,S,errHist,tau] = solver_RPCA_SPGL1(X,lambda_S, epsilon, A_cell, opts);
% sparsity 25.3% with lambda_S = 200

colormap(gray);
subplot(1,3,1);
imagesc(dat.dat, [-1 1]);
title('original dataset')
subplot(1,3,2);
imagesc(L, [-1 1]);
title('L: denoised data matrix')
subplot(1,3,3);
imagesc(S, [-1 1]/100);
title('S: sparse data matrix')

%% Test prediction -------------------------

wh_folds = reshape(repmat(1:30,5,1), 150, 1);
[~, loso.stats] = predict(dat, 'algorithm_name', ...
        'cv_lassopcr', 'nfolds', wh_folds, 'error_type', 'mse');

dat1 = dat;
dat1.dat = L;
[~, loso.stats1] = predict(dat1, 'algorithm_name', ...
        'cv_lassopcr', 'nfolds', wh_folds, 'error_type', 'mse');

dat2 = dat;
dat2.dat = S;
[~, loso.stats2] = predict(dat2, 'algorithm_name', ...
        'cv_lassopcr', 'nfolds', wh_folds, 'error_type', 'mse');

dat3 = dat;
L(:,sum(S)~=0) = [];
wh_folds(sum(S)~=0) = [];
dat3.dat = L;
dat3.Y(sum(S)~=0) = [];
[~, loso.stats3] = predict(dat3, 'algorithm_name', ...
        'cv_lassopcr', 'nfolds', wh_folds, 'error_type', 'mse');


disp(['Prediction-outcome correlation with original data: ' num2str(loso.stats.pred_outcome_r)]);
disp(['Prediction-outcome correlation with L: ' num2str(loso.stats1.pred_outcome_r)]);
disp(['Prediction-outcome correlation with S: ' num2str(loso.stats2.pred_outcome_r)]);
disp(['Prediction-outcome correlation with L (after removing entire columns using S: ' num2str(loso.stats3.pred_outcome_r)]);
	cd('/Users/clinpsywoo/github/fastRPCA');
	setup_fastRPCA;

	% load an example data
	load('/Users/clinpsywoo/github/SAS2015_PatRec/data.mat');

	X = dat.dat;
	SS = svd(X, 'econ'); % can decompose x or x', but faster when rows >> cols
	% 3 solvers (5 variants)
	% 1) constrained
	% 2) lagrangian: two versions: sum of L, S, or
	% max L,S. two params: lambda L,S
	% 3) SPGL1: fixed epsilon (SSE bound), get rid of
	% one parameter

	% Pick SSE cutoff for epsilon
	% - We can choose based on how much residual
	% variance unexplained we want

	cutoff = max(find(cumsum(SS.^2)./sum(SS.^2)<.95));
	% choose the cutoff - less than 95% explained variance
	epsilon = sqrt(sum(SS(cutoff:end).^2));
	% choose lower eps in practice

	%%
	clear opts;
	%opts = [];
	opts.max = true; % max or sum method.
	% max method preferred in general because more stable
	% solution...for algorithmic reasons...

	opts.tau0 = 1193.62;
	% opts.tol: tolerance for convergence. higher tol is faster
	% higher epsilon : faster...

	opts.L1L2 = 'cols';

	A_cell = []; % this is for partial observations (missing data) or
	% partial weights for noisy data/outliers

	lambda_S = 50; % constraint parameter:
	% larger lambda_S, more sparsity; smaller lambda_S, less
	% 0 to Inf... 0 should be no penalty (don't do it)

	% Other considerations:
	% try different lamda_S values.
	% once you find good tau value, use it to initialize
	% sparsity in output -> if 0%, then S is zero, all sparse, reduce lambda_S...
	% could use AIC/BIC to optimize sparseness approximately...

	% Output
	% L is 'denoised' data matrix (run SVD to get distributed components)
	% S is sparse data matrix of excluded from L

	% complexity/slowness: m^2 * v

	[L,S,errHist,tau] = solver_RPCA_SPGL1(X,lambda_S, epsilon, A_cell, opts);
	% sparsity 25.3% with lambda_S = 200

	colormap(gray);
	subplot(1,3,1);
	imagesc(dat.dat, [-1 1]);
	title('original dataset')
	subplot(1,3,2);
	imagesc(L, [-1 1]);
	title('L: denoised data matrix')
	subplot(1,3,3);
	imagesc(S, [-1 1]/100);
	title('S: sparse data matrix')

	%% Test prediction -------------------------

	wh_folds = reshape(repmat(1:30,5,1), 150, 1);
	[~, loso.stats] = predict(dat, 'algorithm_name', ...
	'cv_lassopcr', 'nfolds', wh_folds, 'error_type', 'mse');

	dat1 = dat;
	dat1.dat = L;
	[~, loso.stats1] = predict(dat1, 'algorithm_name', ...
	'cv_lassopcr', 'nfolds', wh_folds, 'error_type', 'mse');

	dat2 = dat;
	dat2.dat = S;
	[~, loso.stats2] = predict(dat2, 'algorithm_name', ...
	'cv_lassopcr', 'nfolds', wh_folds, 'error_type', 'mse');

	dat3 = dat;
	L(:,sum(S)~=0) = [];
	wh_folds(sum(S)~=0) = [];
	dat3.dat = L;
	dat3.Y(sum(S)~=0) = [];
	[~, loso.stats3] = predict(dat3, 'algorithm_name', ...
	'cv_lassopcr', 'nfolds', wh_folds, 'error_type', 'mse');


	disp(['Prediction-outcome correlation with original data: ' num2str(loso.stats.pred_outcome_r)]);
	disp(['Prediction-outcome correlation with L: ' num2str(loso.stats1.pred_outcome_r)]);
	disp(['Prediction-outcome correlation with S: ' num2str(loso.stats2.pred_outcome_r)]);
	disp(['Prediction-outcome correlation with L (after removing entire columns using S: ' num2str(loso.stats3.pred_outcome_r)]);