ibogun/tutorial.m

## tutorial.m
%*************************************************************
% Copyright (C) Wei Zhong.
% All rights reserved.
% Date: 05/2012
% Tutorial on "Robust Object Tracking via Sparsity-based Collaborative Model"
% _Ivan Bogun April 9, 2014_
% Adapted from the demo.m by Wei Zhong

%% References and links to source codes etc.
% Main reference:
%
% [1] Zhong, Wei, Huchuan Lu, and Ming-Hsuan Yang.
% "Robust object tracking via sparsity-based collaborative model."
% Computer Vision and Pattern Recognition (CVPR), 2012 IEEE Conference on. IEEE, 2012.
%
% Original project page:
% <http://ice.dlut.edu.cn/lu/Project/cvpr12_scm/cvpr12_scm.htm>
%
% Original source code:
% <http://ice.dlut.edu.cn/lu/Project/cvpr12_scm/cvpr12_scm.files/cvpr12_wei_code.zip>
%
% File 'tight_subplot':
% <http://www.mathworks.com/matlabcentral/fileexchange/27991-tight-subplot>
%
% File used to generated this tutorial:
%


%% Initialization of the parameters
clc;
%clear all;
addpath('./Affine Sample Functions');
trackparam;              % initial position and affine parameters
opt.tmplsize = [32 32];  % template size
sz = opt.tmplsize;
n_sample = opt.numsample;

param0 = [p(1), p(2), p(3)/sz(2), p(5), p(4)/p(3), 0]; %
p0 = p(4)/p(3);

% sample parameters
param0 = affparam2mat(param0);
param = [];
param.est = param0';

%% Templates generation
% Get templates from the image based on sampled transformations. Positive
% templates are created as ones which were sampled close to the original
% bounding box while negative were sampled within specified distance so
% that overlap is not significant.

% obtain positive and negative templates for the SDC
num_p = 50;             %number of positive
num_n = 200;            %number of negative

[A_poso A_nego] = affineTrainG(dataPath, sz, opt, param, num_p, num_n, forMat, p0);

%% Plot positive and negative templates
% The following code will plot all positive and negative templates.
imRows=10;
isPublishMode=1;
if (isPublishMode)
    fprintf('Left figure: sampled positive templates \n');
    figure;
    ha = tight_subplot(size(A_poso,2)/imRows, imRows,[.01 .01],[.01 .01],[.01 .01]);
    for i=1:size(A_poso,2)
        axes(ha(i));
        %subplot(size(A_poso,2)/imRows, imRows,i);
        imagesc(reshape(A_poso(:,i),32,32));
        axis off;
    end
    colormap gray;
    snapnow;

    imRows=imRows*2;
    fprintf('Right figure: sampled negative templates');
    figure;
    ha = tight_subplot(size(A_nego,2)/imRows, imRows,[.01 .01],[.01 .01],[.01 .01]);
    for i=1:size(A_nego,2)
        axes(ha(i));
        %subplot(size(A_nego_plot,2)/imRows, imRows,i);
        imagesc(reshape(A_nego(:,i),32,32));
        axis off;
    end
    colormap gray;
    snapnow;
end
%% Calculate Dictionary
% Using sliding window template will be divided into overlapping patches.
% These patches will be clustered using k-means to create bag-of-words
% representation of the tracking object.
A_pos = A_poso;
A_neg = A_nego;

% obtain the dictionary for the SGM
patchsize = [6 6];
patchnum(1) = length(patchsize(1)/2 : 2: (sz(1)-patchsize(1)/2));
patchnum(2) = length(patchsize(2)/2 : 2: (sz(2)-patchsize(2)/2));

% size of the dictionary (e.g. k in k-means)
Fisize = 50;
[Fio patcho] = affineTrainL(dataPath, param0, opt, patchsize, patchnum, Fisize, forMat);
Fi = Fio;

if (isPublishMode)
    fprintf('Left figure: All plot patches recovered using sliding window \n');
    imRows=patchnum(1);
    figure;
    ha = tight_subplot(patchnum(2), imRows,[.01 .01],[.01 .01],[.01 .01]);
    for i=1:size(patcho,2)
        axes(ha(i));
        %subplot(size(A_nego_plot,2)/imRows, imRows,i);
        imagesc(reshape(patcho(:,i),patchsize(1),patchsize(2)));
        axis off;
    end
    colormap gray;

    imRows=5;
    fprintf('Right figure:Patches codebook \n');
    figure;
    ha = tight_subplot(Fisize/imRows, imRows,[.01 .01],[.01 .01],[.01 .01]);
    for i=1:size(Fio,2)
        axes(ha(i));
        %subplot(size(A_nego_plot,2)/imRows, imRows,i);
        imagesc(reshape(Fio(:,i),patchsize(1),patchsize(2)));
        axis off;
        axis equal;
    end
    colormap gray;

end
temp = importdata([dataPath 'datainfo.txt']);
num = temp(3);
paramSR.lambda2 = 0;
paramSR.mode = 2;
alpha_p = zeros(Fisize, prod(patchnum), num);
result = zeros(num, 6);

%% Tracking loop

fprintf('Tracking display \n');
num=11;
for f = 1:num

    % read the image and convert it to grayscale
    img_color = imread([dataPath int2str(f) forMat]);
    if size(img_color,3)==3
        img	= rgb2gray(img_color);
    else
        img	= img_color;
    end

    %% Sparsity-based Discriminative Classifier (SDC)
    gamma = 0.4;

    % Particle filtering sampling of the affine transformations
    [wimgs Y param] = affineSample(double(img), sz, opt, param);

    % normalization
    YY = normVector(Y);
    AA_pos = normVector(A_pos);
    AA_neg = normVector(A_neg);


    %% Feature selection step
    % This step is used to reduce redundant set of features sampled on the
    % previous step. Discriminative features are selected solving LASSO
    % problem: $$\min_s || A s-p||_2^2+\lambda||s||_1$$
    P = selectFeature(AA_pos, AA_neg, paramSR);

    % project the original feature space to the selected feature space
    YYY = P'*YY;
    AAA_pos = P'*AA_pos;
    AAA_neg = P'*AA_neg;

    paramSR.L = length(YYY(:,1));
    paramSR.lambda = 0.01;

    %% Representation in the new feature space
    % Samples generated on the previous step will be projected to the most
    % discriminative positive and negative templates. Projection is done
    % via LASSO.
    beta = mexLasso(YYY, [AAA_pos AAA_neg], paramSR);
    beta = full(beta);

    %% SDC confidence measure
    % Calculate reconstruction error of the projected newly sampled
    % templates onto projected set of positive and negative examples. Set
    % $\epsilon_f(\beta)=||x'-A_{+}' \beta||_2^2$ as a positive projection error
    % and $\epsilon_b(\beta)=||x'-A_{-}' \beta||_2^2$ as negative.
    % Combined score is given by
    % $H_c=\exp{-\frac{\epsilon_f-\epsilon_b}{\sigma}}$. This gives high
    % scores to templates which have higher positive projection error and
    % lower negative one.
    rec_f = sum((YYY - AAA_pos*beta(1:size(AAA_pos,2),:)).^2);
    rec_b = sum((YYY - AAA_neg*beta(size(AAA_pos,2)+1:end,:)).^2);
    con = exp(-rec_f/gamma)./exp(-rec_b/gamma);

    %% Sparsity-based Generative Model (SGM)
    yita = 0.01;

    % obtain M patches for each candidate
    patch = affinePatch(wimgs, patchsize, patchnum);

    % normalization
    Fii = normVector(Fi);

    % the template histogram in the first frame and before occlusion handling
    if f==1
        xo = normVector(patcho);
        paramSR.L = length(xo(:,1));
        paramSR.lambda = 0.01;
        alpha_q = mexLasso(xo, Fii, paramSR);
        alpha_q = full(alpha_q);
        alpha_qq = alpha_q;
    end

    temp_q = ones(Fisize, prod(patchnum));
    sim = zeros(1,n_sample);
    b = zeros(1,n_sample);

    % the sparse coefficient vectors for M patches

    %% Histogram generation
    % For each sample a set of patches was already created. Each set of
    % patches will be used to find sparse representation using predefined
    % dictionary, $D$. Once representation is found reconstruction error is
    % calculated. This error provides an estimate if the occlusion is
    % taking place, thus if the error is high occlusion is likely to be
    % happening. Occlusion is handled as a threshold for the reconstruction
    % error. Similarity between candidate and the template is calculated
    % using histogram intersection kernel: $L_c=\sum_j
    % \min(\phi_c^j,\phi^j)$.

    for i = 1:n_sample
        x = normVector(patch(:,:,i));
        paramSR.L = length(x(:,1));
        paramSR.lambda = 0.01;
        alpha = mexLasso(x, Fii, paramSR);
        alpha = full(alpha);
        alpha_p(:,:,i) = alpha;

        % the reconstruction error of each patch
        recon = sum((x - Fii*alpha).^2);

        % occlusion parameter
        thr = 0.04;

        % the occlusion indicator
        thr_lable = recon>=thr;
        temp = ones(Fisize, prod(patchnum));
        temp(:, thr_lable) = 0;

        % the weighted histogram for the candidate
        p = temp.*abs(alpha);
        p = reshape(p, 1, numel(p));
        p = p./sum(p);

        % the weighted histogram for the template
        temp_qq = temp_q;
        temp_qq(:, thr_lable) = 0;
        q = temp_qq.*abs(alpha_qq);
        q = reshape(q, 1, numel(q));
        q = q./sum(q);

        % the similarity between the candidate and the template
        lambda_thr = 0.00003;
        a = sum(min([p; q]));
        b(i) = lambda_thr*sum(thr_lable);
        sim(i) = a + b(i);
    end

    %% Collaborative Model
    % Scores from SDC and SGM are multiplied here and the most likely
    % candidate is chosen.
    likelihood = con.*sim;
    [v_max,id_max] = max(likelihood);


    param.est = affparam2mat(param.param(:,id_max));
    result(f,:) = param.est';
    % display the tracking result in each frame


    if (mod(f,5)==0|| f==1)
        displayResult_sf;
        snapnow;
    end
    %% Update Scheme
    % Dictionary $D$ remains the same for the whole sequence. Templates are
    % updated every 5 frames as a convex linear combination of old and new
    % onces.
    upRate = 5;
    if rem(f, upRate)==0
        [A_neg alpha_qq] = updateDic(dataPath, sz, opt, param, num_n, forMat, p0, f, abs(alpha_q), abs(alpha_p(:,:,id_max)), (b(id_max)/lambda_thr)/prod(patchnum));
    end
end

%% Save and Display Tracking Results

save([ title '.mat'], 'result');
% displayResult;                                                     % display the tracking results in the whole image sequence
	%*************************************************************
	% Copyright (C) Wei Zhong.
	% All rights reserved.
	% Date: 05/2012
	% Tutorial on "Robust Object Tracking via Sparsity-based Collaborative Model"
	% _Ivan Bogun April 9, 2014_
	% Adapted from the demo.m by Wei Zhong

	%% References and links to source codes etc.
	% Main reference:
	%
	% [1] Zhong, Wei, Huchuan Lu, and Ming-Hsuan Yang.
	% "Robust object tracking via sparsity-based collaborative model."
	% Computer Vision and Pattern Recognition (CVPR), 2012 IEEE Conference on. IEEE, 2012.
	%
	% Original project page:
	% <http://ice.dlut.edu.cn/lu/Project/cvpr12_scm/cvpr12_scm.htm>
	%
	% Original source code:
	% <http://ice.dlut.edu.cn/lu/Project/cvpr12_scm/cvpr12_scm.files/cvpr12_wei_code.zip>
	%
	% File 'tight_subplot':
	% <http://www.mathworks.com/matlabcentral/fileexchange/27991-tight-subplot>
	%
	% File used to generated this tutorial:
	%


	%% Initialization of the parameters
	clc;
	%clear all;
	addpath('./Affine Sample Functions');
	trackparam; % initial position and affine parameters
	opt.tmplsize = [32 32]; % template size
	sz = opt.tmplsize;
	n_sample = opt.numsample;

	param0 = [p(1), p(2), p(3)/sz(2), p(5), p(4)/p(3), 0]; %
	p0 = p(4)/p(3);

	% sample parameters
	param0 = affparam2mat(param0);
	param = [];
	param.est = param0';

	%% Templates generation
	% Get templates from the image based on sampled transformations. Positive
	% templates are created as ones which were sampled close to the original
	% bounding box while negative were sampled within specified distance so
	% that overlap is not significant.

	% obtain positive and negative templates for the SDC
	num_p = 50; %number of positive
	num_n = 200; %number of negative

	[A_poso A_nego] = affineTrainG(dataPath, sz, opt, param, num_p, num_n, forMat, p0);

	%% Plot positive and negative templates
	% The following code will plot all positive and negative templates.
	imRows=10;
	isPublishMode=1;
	if (isPublishMode)
	fprintf('Left figure: sampled positive templates \n');
	figure;
	ha = tight_subplot(size(A_poso,2)/imRows, imRows,[.01 .01],[.01 .01],[.01 .01]);
	for i=1:size(A_poso,2)
	axes(ha(i));
	%subplot(size(A_poso,2)/imRows, imRows,i);
	imagesc(reshape(A_poso(:,i),32,32));
	axis off;
	end
	colormap gray;
	snapnow;

	imRows=imRows*2;
	fprintf('Right figure: sampled negative templates');
	figure;
	ha = tight_subplot(size(A_nego,2)/imRows, imRows,[.01 .01],[.01 .01],[.01 .01]);
	for i=1:size(A_nego,2)
	axes(ha(i));
	%subplot(size(A_nego_plot,2)/imRows, imRows,i);
	imagesc(reshape(A_nego(:,i),32,32));
	axis off;
	end
	colormap gray;
	snapnow;
	end
	%% Calculate Dictionary
	% Using sliding window template will be divided into overlapping patches.
	% These patches will be clustered using k-means to create bag-of-words
	% representation of the tracking object.
	A_pos = A_poso;
	A_neg = A_nego;

	% obtain the dictionary for the SGM
	patchsize = [6 6];
	patchnum(1) = length(patchsize(1)/2 : 2: (sz(1)-patchsize(1)/2));
	patchnum(2) = length(patchsize(2)/2 : 2: (sz(2)-patchsize(2)/2));

	% size of the dictionary (e.g. k in k-means)
	Fisize = 50;
	[Fio patcho] = affineTrainL(dataPath, param0, opt, patchsize, patchnum, Fisize, forMat);
	Fi = Fio;

	if (isPublishMode)
	fprintf('Left figure: All plot patches recovered using sliding window \n');
	imRows=patchnum(1);
	figure;
	ha = tight_subplot(patchnum(2), imRows,[.01 .01],[.01 .01],[.01 .01]);
	for i=1:size(patcho,2)
	axes(ha(i));
	%subplot(size(A_nego_plot,2)/imRows, imRows,i);
	imagesc(reshape(patcho(:,i),patchsize(1),patchsize(2)));
	axis off;
	end
	colormap gray;

	imRows=5;
	fprintf('Right figure:Patches codebook \n');
	figure;
	ha = tight_subplot(Fisize/imRows, imRows,[.01 .01],[.01 .01],[.01 .01]);
	for i=1:size(Fio,2)
	axes(ha(i));
	%subplot(size(A_nego_plot,2)/imRows, imRows,i);
	imagesc(reshape(Fio(:,i),patchsize(1),patchsize(2)));
	axis off;
	axis equal;
	end
	colormap gray;

	end
	temp = importdata([dataPath 'datainfo.txt']);
	num = temp(3);
	paramSR.lambda2 = 0;
	paramSR.mode = 2;
	alpha_p = zeros(Fisize, prod(patchnum), num);
	result = zeros(num, 6);

	%% Tracking loop

	fprintf('Tracking display \n');
	num=11;
	for f = 1:num

	% read the image and convert it to grayscale
	img_color = imread([dataPath int2str(f) forMat]);
	if size(img_color,3)==3
	img = rgb2gray(img_color);
	else
	img = img_color;
	end

	%% Sparsity-based Discriminative Classifier (SDC)
	gamma = 0.4;

	% Particle filtering sampling of the affine transformations
	[wimgs Y param] = affineSample(double(img), sz, opt, param);

	% normalization
	YY = normVector(Y);
	AA_pos = normVector(A_pos);
	AA_neg = normVector(A_neg);


	%% Feature selection step
	% This step is used to reduce redundant set of features sampled on the
	% previous step. Discriminative features are selected solving LASSO
	% problem: $$\min_s \|\| A s-p\|\|_2^2+\lambda\|\|s\|\|_1$$
	P = selectFeature(AA_pos, AA_neg, paramSR);

	% project the original feature space to the selected feature space
	YYY = P'*YY;
	AAA_pos = P'*AA_pos;
	AAA_neg = P'*AA_neg;

	paramSR.L = length(YYY(:,1));
	paramSR.lambda = 0.01;

	%% Representation in the new feature space
	% Samples generated on the previous step will be projected to the most
	% discriminative positive and negative templates. Projection is done
	% via LASSO.
	beta = mexLasso(YYY, [AAA_pos AAA_neg], paramSR);
	beta = full(beta);

	%% SDC confidence measure
	% Calculate reconstruction error of the projected newly sampled
	% templates onto projected set of positive and negative examples. Set
	% $\epsilon_f(\beta)=\|\|x'-A_{+}' \beta\|\|_2^2$ as a positive projection error
	% and $\epsilon_b(\beta)=\|\|x'-A_{-}' \beta\|\|_2^2$ as negative.
	% Combined score is given by
	% $H_c=\exp{-\frac{\epsilon_f-\epsilon_b}{\sigma}}$. This gives high
	% scores to templates which have higher positive projection error and
	% lower negative one.
	rec_f = sum((YYY - AAA_pos*beta(1:size(AAA_pos,2),:)).^2);
	rec_b = sum((YYY - AAA_neg*beta(size(AAA_pos,2)+1:end,:)).^2);
	con = exp(-rec_f/gamma)./exp(-rec_b/gamma);

	%% Sparsity-based Generative Model (SGM)
	yita = 0.01;

	% obtain M patches for each candidate
	patch = affinePatch(wimgs, patchsize, patchnum);

	% normalization
	Fii = normVector(Fi);

	% the template histogram in the first frame and before occlusion handling
	if f==1
	xo = normVector(patcho);
	paramSR.L = length(xo(:,1));
	paramSR.lambda = 0.01;
	alpha_q = mexLasso(xo, Fii, paramSR);
	alpha_q = full(alpha_q);
	alpha_qq = alpha_q;
	end

	temp_q = ones(Fisize, prod(patchnum));
	sim = zeros(1,n_sample);
	b = zeros(1,n_sample);

	% the sparse coefficient vectors for M patches

	%% Histogram generation
	% For each sample a set of patches was already created. Each set of
	% patches will be used to find sparse representation using predefined
	% dictionary, $D$. Once representation is found reconstruction error is
	% calculated. This error provides an estimate if the occlusion is
	% taking place, thus if the error is high occlusion is likely to be
	% happening. Occlusion is handled as a threshold for the reconstruction
	% error. Similarity between candidate and the template is calculated
	% using histogram intersection kernel: $L_c=\sum_j
	% \min(\phi_c^j,\phi^j)$.

	for i = 1:n_sample
	x = normVector(patch(:,:,i));
	paramSR.L = length(x(:,1));
	paramSR.lambda = 0.01;
	alpha = mexLasso(x, Fii, paramSR);
	alpha = full(alpha);
	alpha_p(:,:,i) = alpha;

	% the reconstruction error of each patch
	recon = sum((x - Fii*alpha).^2);

	% occlusion parameter
	thr = 0.04;

	% the occlusion indicator
	thr_lable = recon>=thr;
	temp = ones(Fisize, prod(patchnum));
	temp(:, thr_lable) = 0;

	% the weighted histogram for the candidate
	p = temp.*abs(alpha);
	p = reshape(p, 1, numel(p));
	p = p./sum(p);

	% the weighted histogram for the template
	temp_qq = temp_q;
	temp_qq(:, thr_lable) = 0;
	q = temp_qq.*abs(alpha_qq);
	q = reshape(q, 1, numel(q));
	q = q./sum(q);

	% the similarity between the candidate and the template
	lambda_thr = 0.00003;
	a = sum(min([p; q]));
	b(i) = lambda_thr*sum(thr_lable);
	sim(i) = a + b(i);
	end

	%% Collaborative Model
	% Scores from SDC and SGM are multiplied here and the most likely
	% candidate is chosen.
	likelihood = con.*sim;
	[v_max,id_max] = max(likelihood);



	param.est = affparam2mat(param.param(:,id_max));
	result(f,:) = param.est';
	% display the tracking result in each frame


	if (mod(f,5)==0\|\| f==1)
	displayResult_sf;
	snapnow;
	end
	%% Update Scheme
	% Dictionary $D$ remains the same for the whole sequence. Templates are
	% updated every 5 frames as a convex linear combination of old and new
	% onces.
	upRate = 5;
	if rem(f, upRate)==0
	[A_neg alpha_qq] = updateDic(dataPath, sz, opt, param, num_n, forMat, p0, f, abs(alpha_q), abs(alpha_p(:,:,id_max)), (b(id_max)/lambda_thr)/prod(patchnum));
	end
	end

	%% Save and Display Tracking Results

	save([ title '.mat'], 'result');
	% displayResult; % display the tracking results in the whole image sequence