Created
April 9, 2014 14:55
-
-
Save ibogun/10279761 to your computer and use it in GitHub Desktop.
Tutorial on "Robust Object Tracking via Sparsity-based Collaborative Model"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%************************************************************* | |
% Copyright (C) Wei Zhong. | |
% All rights reserved. | |
% Date: 05/2012 | |
% Tutorial on "Robust Object Tracking via Sparsity-based Collaborative Model" | |
% _Ivan Bogun April 9, 2014_ | |
% Adapted from the demo.m by Wei Zhong | |
%% References and links to source codes etc. | |
% Main reference: | |
% | |
% [1] Zhong, Wei, Huchuan Lu, and Ming-Hsuan Yang. | |
% "Robust object tracking via sparsity-based collaborative model." | |
% Computer Vision and Pattern Recognition (CVPR), 2012 IEEE Conference on. IEEE, 2012. | |
% | |
% Original project page: | |
% <http://ice.dlut.edu.cn/lu/Project/cvpr12_scm/cvpr12_scm.htm> | |
% | |
% Original source code: | |
% <http://ice.dlut.edu.cn/lu/Project/cvpr12_scm/cvpr12_scm.files/cvpr12_wei_code.zip> | |
% | |
% File 'tight_subplot': | |
% <http://www.mathworks.com/matlabcentral/fileexchange/27991-tight-subplot> | |
% | |
% File used to generated this tutorial: | |
% | |
%% Initialization of the parameters | |
clc; | |
%clear all; | |
addpath('./Affine Sample Functions'); | |
trackparam; % initial position and affine parameters | |
opt.tmplsize = [32 32]; % template size | |
sz = opt.tmplsize; | |
n_sample = opt.numsample; | |
param0 = [p(1), p(2), p(3)/sz(2), p(5), p(4)/p(3), 0]; % | |
p0 = p(4)/p(3); | |
% sample parameters | |
param0 = affparam2mat(param0); | |
param = []; | |
param.est = param0'; | |
%% Templates generation | |
% Get templates from the image based on sampled transformations. Positive | |
% templates are created as ones which were sampled close to the original | |
% bounding box while negative were sampled within specified distance so | |
% that overlap is not significant. | |
% obtain positive and negative templates for the SDC | |
num_p = 50; %number of positive | |
num_n = 200; %number of negative | |
[A_poso A_nego] = affineTrainG(dataPath, sz, opt, param, num_p, num_n, forMat, p0); | |
%% Plot positive and negative templates | |
% The following code will plot all positive and negative templates. | |
imRows=10; | |
isPublishMode=1; | |
if (isPublishMode) | |
fprintf('Left figure: sampled positive templates \n'); | |
figure; | |
ha = tight_subplot(size(A_poso,2)/imRows, imRows,[.01 .01],[.01 .01],[.01 .01]); | |
for i=1:size(A_poso,2) | |
axes(ha(i)); | |
%subplot(size(A_poso,2)/imRows, imRows,i); | |
imagesc(reshape(A_poso(:,i),32,32)); | |
axis off; | |
end | |
colormap gray; | |
snapnow; | |
imRows=imRows*2; | |
fprintf('Right figure: sampled negative templates'); | |
figure; | |
ha = tight_subplot(size(A_nego,2)/imRows, imRows,[.01 .01],[.01 .01],[.01 .01]); | |
for i=1:size(A_nego,2) | |
axes(ha(i)); | |
%subplot(size(A_nego_plot,2)/imRows, imRows,i); | |
imagesc(reshape(A_nego(:,i),32,32)); | |
axis off; | |
end | |
colormap gray; | |
snapnow; | |
end | |
%% Calculate Dictionary | |
% Using sliding window template will be divided into overlapping patches. | |
% These patches will be clustered using k-means to create bag-of-words | |
% representation of the tracking object. | |
A_pos = A_poso; | |
A_neg = A_nego; | |
% obtain the dictionary for the SGM | |
patchsize = [6 6]; | |
patchnum(1) = length(patchsize(1)/2 : 2: (sz(1)-patchsize(1)/2)); | |
patchnum(2) = length(patchsize(2)/2 : 2: (sz(2)-patchsize(2)/2)); | |
% size of the dictionary (e.g. k in k-means) | |
Fisize = 50; | |
[Fio patcho] = affineTrainL(dataPath, param0, opt, patchsize, patchnum, Fisize, forMat); | |
Fi = Fio; | |
if (isPublishMode) | |
fprintf('Left figure: All plot patches recovered using sliding window \n'); | |
imRows=patchnum(1); | |
figure; | |
ha = tight_subplot(patchnum(2), imRows,[.01 .01],[.01 .01],[.01 .01]); | |
for i=1:size(patcho,2) | |
axes(ha(i)); | |
%subplot(size(A_nego_plot,2)/imRows, imRows,i); | |
imagesc(reshape(patcho(:,i),patchsize(1),patchsize(2))); | |
axis off; | |
end | |
colormap gray; | |
imRows=5; | |
fprintf('Right figure:Patches codebook \n'); | |
figure; | |
ha = tight_subplot(Fisize/imRows, imRows,[.01 .01],[.01 .01],[.01 .01]); | |
for i=1:size(Fio,2) | |
axes(ha(i)); | |
%subplot(size(A_nego_plot,2)/imRows, imRows,i); | |
imagesc(reshape(Fio(:,i),patchsize(1),patchsize(2))); | |
axis off; | |
axis equal; | |
end | |
colormap gray; | |
end | |
temp = importdata([dataPath 'datainfo.txt']); | |
num = temp(3); | |
paramSR.lambda2 = 0; | |
paramSR.mode = 2; | |
alpha_p = zeros(Fisize, prod(patchnum), num); | |
result = zeros(num, 6); | |
%% Tracking loop | |
fprintf('Tracking display \n'); | |
num=11; | |
for f = 1:num | |
% read the image and convert it to grayscale | |
img_color = imread([dataPath int2str(f) forMat]); | |
if size(img_color,3)==3 | |
img = rgb2gray(img_color); | |
else | |
img = img_color; | |
end | |
%% Sparsity-based Discriminative Classifier (SDC) | |
gamma = 0.4; | |
% Particle filtering sampling of the affine transformations | |
[wimgs Y param] = affineSample(double(img), sz, opt, param); | |
% normalization | |
YY = normVector(Y); | |
AA_pos = normVector(A_pos); | |
AA_neg = normVector(A_neg); | |
%% Feature selection step | |
% This step is used to reduce redundant set of features sampled on the | |
% previous step. Discriminative features are selected solving LASSO | |
% problem: $$\min_s || A s-p||_2^2+\lambda||s||_1$$ | |
P = selectFeature(AA_pos, AA_neg, paramSR); | |
% project the original feature space to the selected feature space | |
YYY = P'*YY; | |
AAA_pos = P'*AA_pos; | |
AAA_neg = P'*AA_neg; | |
paramSR.L = length(YYY(:,1)); | |
paramSR.lambda = 0.01; | |
%% Representation in the new feature space | |
% Samples generated on the previous step will be projected to the most | |
% discriminative positive and negative templates. Projection is done | |
% via LASSO. | |
beta = mexLasso(YYY, [AAA_pos AAA_neg], paramSR); | |
beta = full(beta); | |
%% SDC confidence measure | |
% Calculate reconstruction error of the projected newly sampled | |
% templates onto projected set of positive and negative examples. Set | |
% $\epsilon_f(\beta)=||x'-A_{+}' \beta||_2^2$ as a positive projection error | |
% and $\epsilon_b(\beta)=||x'-A_{-}' \beta||_2^2$ as negative. | |
% Combined score is given by | |
% $H_c=\exp{-\frac{\epsilon_f-\epsilon_b}{\sigma}}$. This gives high | |
% scores to templates which have higher positive projection error and | |
% lower negative one. | |
rec_f = sum((YYY - AAA_pos*beta(1:size(AAA_pos,2),:)).^2); | |
rec_b = sum((YYY - AAA_neg*beta(size(AAA_pos,2)+1:end,:)).^2); | |
con = exp(-rec_f/gamma)./exp(-rec_b/gamma); | |
%% Sparsity-based Generative Model (SGM) | |
yita = 0.01; | |
% obtain M patches for each candidate | |
patch = affinePatch(wimgs, patchsize, patchnum); | |
% normalization | |
Fii = normVector(Fi); | |
% the template histogram in the first frame and before occlusion handling | |
if f==1 | |
xo = normVector(patcho); | |
paramSR.L = length(xo(:,1)); | |
paramSR.lambda = 0.01; | |
alpha_q = mexLasso(xo, Fii, paramSR); | |
alpha_q = full(alpha_q); | |
alpha_qq = alpha_q; | |
end | |
temp_q = ones(Fisize, prod(patchnum)); | |
sim = zeros(1,n_sample); | |
b = zeros(1,n_sample); | |
% the sparse coefficient vectors for M patches | |
%% Histogram generation | |
% For each sample a set of patches was already created. Each set of | |
% patches will be used to find sparse representation using predefined | |
% dictionary, $D$. Once representation is found reconstruction error is | |
% calculated. This error provides an estimate if the occlusion is | |
% taking place, thus if the error is high occlusion is likely to be | |
% happening. Occlusion is handled as a threshold for the reconstruction | |
% error. Similarity between candidate and the template is calculated | |
% using histogram intersection kernel: $L_c=\sum_j | |
% \min(\phi_c^j,\phi^j)$. | |
for i = 1:n_sample | |
x = normVector(patch(:,:,i)); | |
paramSR.L = length(x(:,1)); | |
paramSR.lambda = 0.01; | |
alpha = mexLasso(x, Fii, paramSR); | |
alpha = full(alpha); | |
alpha_p(:,:,i) = alpha; | |
% the reconstruction error of each patch | |
recon = sum((x - Fii*alpha).^2); | |
% occlusion parameter | |
thr = 0.04; | |
% the occlusion indicator | |
thr_lable = recon>=thr; | |
temp = ones(Fisize, prod(patchnum)); | |
temp(:, thr_lable) = 0; | |
% the weighted histogram for the candidate | |
p = temp.*abs(alpha); | |
p = reshape(p, 1, numel(p)); | |
p = p./sum(p); | |
% the weighted histogram for the template | |
temp_qq = temp_q; | |
temp_qq(:, thr_lable) = 0; | |
q = temp_qq.*abs(alpha_qq); | |
q = reshape(q, 1, numel(q)); | |
q = q./sum(q); | |
% the similarity between the candidate and the template | |
lambda_thr = 0.00003; | |
a = sum(min([p; q])); | |
b(i) = lambda_thr*sum(thr_lable); | |
sim(i) = a + b(i); | |
end | |
%% Collaborative Model | |
% Scores from SDC and SGM are multiplied here and the most likely | |
% candidate is chosen. | |
likelihood = con.*sim; | |
[v_max,id_max] = max(likelihood); | |
param.est = affparam2mat(param.param(:,id_max)); | |
result(f,:) = param.est'; | |
% display the tracking result in each frame | |
if (mod(f,5)==0|| f==1) | |
displayResult_sf; | |
snapnow; | |
end | |
%% Update Scheme | |
% Dictionary $D$ remains the same for the whole sequence. Templates are | |
% updated every 5 frames as a convex linear combination of old and new | |
% onces. | |
upRate = 5; | |
if rem(f, upRate)==0 | |
[A_neg alpha_qq] = updateDic(dataPath, sz, opt, param, num_n, forMat, p0, f, abs(alpha_q), abs(alpha_p(:,:,id_max)), (b(id_max)/lambda_thr)/prod(patchnum)); | |
end | |
end | |
%% Save and Display Tracking Results | |
save([ title '.mat'], 'result'); | |
% displayResult; % display the tracking results in the whole image sequence |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment