Skip to content

Instantly share code, notes, and snippets.

@ibogun
Created April 9, 2014 14:55
Show Gist options
  • Save ibogun/10279761 to your computer and use it in GitHub Desktop.
Save ibogun/10279761 to your computer and use it in GitHub Desktop.
Tutorial on "Robust Object Tracking via Sparsity-based Collaborative Model"
%*************************************************************
% Copyright (C) Wei Zhong.
% All rights reserved.
% Date: 05/2012
% Tutorial on "Robust Object Tracking via Sparsity-based Collaborative Model"
% _Ivan Bogun April 9, 2014_
% Adapted from the demo.m by Wei Zhong
%% References and links to source codes etc.
% Main reference:
%
% [1] Zhong, Wei, Huchuan Lu, and Ming-Hsuan Yang.
% "Robust object tracking via sparsity-based collaborative model."
% Computer Vision and Pattern Recognition (CVPR), 2012 IEEE Conference on. IEEE, 2012.
%
% Original project page:
% <http://ice.dlut.edu.cn/lu/Project/cvpr12_scm/cvpr12_scm.htm>
%
% Original source code:
% <http://ice.dlut.edu.cn/lu/Project/cvpr12_scm/cvpr12_scm.files/cvpr12_wei_code.zip>
%
% File 'tight_subplot':
% <http://www.mathworks.com/matlabcentral/fileexchange/27991-tight-subplot>
%
% File used to generated this tutorial:
%
%% Initialization of the parameters
clc;
%clear all;
addpath('./Affine Sample Functions');
trackparam; % initial position and affine parameters
opt.tmplsize = [32 32]; % template size
sz = opt.tmplsize;
n_sample = opt.numsample;
param0 = [p(1), p(2), p(3)/sz(2), p(5), p(4)/p(3), 0]; %
p0 = p(4)/p(3);
% sample parameters
param0 = affparam2mat(param0);
param = [];
param.est = param0';
%% Templates generation
% Get templates from the image based on sampled transformations. Positive
% templates are created as ones which were sampled close to the original
% bounding box while negative were sampled within specified distance so
% that overlap is not significant.
% obtain positive and negative templates for the SDC
num_p = 50; %number of positive
num_n = 200; %number of negative
[A_poso A_nego] = affineTrainG(dataPath, sz, opt, param, num_p, num_n, forMat, p0);
%% Plot positive and negative templates
% The following code will plot all positive and negative templates.
imRows=10;
isPublishMode=1;
if (isPublishMode)
fprintf('Left figure: sampled positive templates \n');
figure;
ha = tight_subplot(size(A_poso,2)/imRows, imRows,[.01 .01],[.01 .01],[.01 .01]);
for i=1:size(A_poso,2)
axes(ha(i));
%subplot(size(A_poso,2)/imRows, imRows,i);
imagesc(reshape(A_poso(:,i),32,32));
axis off;
end
colormap gray;
snapnow;
imRows=imRows*2;
fprintf('Right figure: sampled negative templates');
figure;
ha = tight_subplot(size(A_nego,2)/imRows, imRows,[.01 .01],[.01 .01],[.01 .01]);
for i=1:size(A_nego,2)
axes(ha(i));
%subplot(size(A_nego_plot,2)/imRows, imRows,i);
imagesc(reshape(A_nego(:,i),32,32));
axis off;
end
colormap gray;
snapnow;
end
%% Calculate Dictionary
% Using sliding window template will be divided into overlapping patches.
% These patches will be clustered using k-means to create bag-of-words
% representation of the tracking object.
A_pos = A_poso;
A_neg = A_nego;
% obtain the dictionary for the SGM
patchsize = [6 6];
patchnum(1) = length(patchsize(1)/2 : 2: (sz(1)-patchsize(1)/2));
patchnum(2) = length(patchsize(2)/2 : 2: (sz(2)-patchsize(2)/2));
% size of the dictionary (e.g. k in k-means)
Fisize = 50;
[Fio patcho] = affineTrainL(dataPath, param0, opt, patchsize, patchnum, Fisize, forMat);
Fi = Fio;
if (isPublishMode)
fprintf('Left figure: All plot patches recovered using sliding window \n');
imRows=patchnum(1);
figure;
ha = tight_subplot(patchnum(2), imRows,[.01 .01],[.01 .01],[.01 .01]);
for i=1:size(patcho,2)
axes(ha(i));
%subplot(size(A_nego_plot,2)/imRows, imRows,i);
imagesc(reshape(patcho(:,i),patchsize(1),patchsize(2)));
axis off;
end
colormap gray;
imRows=5;
fprintf('Right figure:Patches codebook \n');
figure;
ha = tight_subplot(Fisize/imRows, imRows,[.01 .01],[.01 .01],[.01 .01]);
for i=1:size(Fio,2)
axes(ha(i));
%subplot(size(A_nego_plot,2)/imRows, imRows,i);
imagesc(reshape(Fio(:,i),patchsize(1),patchsize(2)));
axis off;
axis equal;
end
colormap gray;
end
temp = importdata([dataPath 'datainfo.txt']);
num = temp(3);
paramSR.lambda2 = 0;
paramSR.mode = 2;
alpha_p = zeros(Fisize, prod(patchnum), num);
result = zeros(num, 6);
%% Tracking loop
fprintf('Tracking display \n');
num=11;
for f = 1:num
% read the image and convert it to grayscale
img_color = imread([dataPath int2str(f) forMat]);
if size(img_color,3)==3
img = rgb2gray(img_color);
else
img = img_color;
end
%% Sparsity-based Discriminative Classifier (SDC)
gamma = 0.4;
% Particle filtering sampling of the affine transformations
[wimgs Y param] = affineSample(double(img), sz, opt, param);
% normalization
YY = normVector(Y);
AA_pos = normVector(A_pos);
AA_neg = normVector(A_neg);
%% Feature selection step
% This step is used to reduce redundant set of features sampled on the
% previous step. Discriminative features are selected solving LASSO
% problem: $$\min_s || A s-p||_2^2+\lambda||s||_1$$
P = selectFeature(AA_pos, AA_neg, paramSR);
% project the original feature space to the selected feature space
YYY = P'*YY;
AAA_pos = P'*AA_pos;
AAA_neg = P'*AA_neg;
paramSR.L = length(YYY(:,1));
paramSR.lambda = 0.01;
%% Representation in the new feature space
% Samples generated on the previous step will be projected to the most
% discriminative positive and negative templates. Projection is done
% via LASSO.
beta = mexLasso(YYY, [AAA_pos AAA_neg], paramSR);
beta = full(beta);
%% SDC confidence measure
% Calculate reconstruction error of the projected newly sampled
% templates onto projected set of positive and negative examples. Set
% $\epsilon_f(\beta)=||x'-A_{+}' \beta||_2^2$ as a positive projection error
% and $\epsilon_b(\beta)=||x'-A_{-}' \beta||_2^2$ as negative.
% Combined score is given by
% $H_c=\exp{-\frac{\epsilon_f-\epsilon_b}{\sigma}}$. This gives high
% scores to templates which have higher positive projection error and
% lower negative one.
rec_f = sum((YYY - AAA_pos*beta(1:size(AAA_pos,2),:)).^2);
rec_b = sum((YYY - AAA_neg*beta(size(AAA_pos,2)+1:end,:)).^2);
con = exp(-rec_f/gamma)./exp(-rec_b/gamma);
%% Sparsity-based Generative Model (SGM)
yita = 0.01;
% obtain M patches for each candidate
patch = affinePatch(wimgs, patchsize, patchnum);
% normalization
Fii = normVector(Fi);
% the template histogram in the first frame and before occlusion handling
if f==1
xo = normVector(patcho);
paramSR.L = length(xo(:,1));
paramSR.lambda = 0.01;
alpha_q = mexLasso(xo, Fii, paramSR);
alpha_q = full(alpha_q);
alpha_qq = alpha_q;
end
temp_q = ones(Fisize, prod(patchnum));
sim = zeros(1,n_sample);
b = zeros(1,n_sample);
% the sparse coefficient vectors for M patches
%% Histogram generation
% For each sample a set of patches was already created. Each set of
% patches will be used to find sparse representation using predefined
% dictionary, $D$. Once representation is found reconstruction error is
% calculated. This error provides an estimate if the occlusion is
% taking place, thus if the error is high occlusion is likely to be
% happening. Occlusion is handled as a threshold for the reconstruction
% error. Similarity between candidate and the template is calculated
% using histogram intersection kernel: $L_c=\sum_j
% \min(\phi_c^j,\phi^j)$.
for i = 1:n_sample
x = normVector(patch(:,:,i));
paramSR.L = length(x(:,1));
paramSR.lambda = 0.01;
alpha = mexLasso(x, Fii, paramSR);
alpha = full(alpha);
alpha_p(:,:,i) = alpha;
% the reconstruction error of each patch
recon = sum((x - Fii*alpha).^2);
% occlusion parameter
thr = 0.04;
% the occlusion indicator
thr_lable = recon>=thr;
temp = ones(Fisize, prod(patchnum));
temp(:, thr_lable) = 0;
% the weighted histogram for the candidate
p = temp.*abs(alpha);
p = reshape(p, 1, numel(p));
p = p./sum(p);
% the weighted histogram for the template
temp_qq = temp_q;
temp_qq(:, thr_lable) = 0;
q = temp_qq.*abs(alpha_qq);
q = reshape(q, 1, numel(q));
q = q./sum(q);
% the similarity between the candidate and the template
lambda_thr = 0.00003;
a = sum(min([p; q]));
b(i) = lambda_thr*sum(thr_lable);
sim(i) = a + b(i);
end
%% Collaborative Model
% Scores from SDC and SGM are multiplied here and the most likely
% candidate is chosen.
likelihood = con.*sim;
[v_max,id_max] = max(likelihood);
param.est = affparam2mat(param.param(:,id_max));
result(f,:) = param.est';
% display the tracking result in each frame
if (mod(f,5)==0|| f==1)
displayResult_sf;
snapnow;
end
%% Update Scheme
% Dictionary $D$ remains the same for the whole sequence. Templates are
% updated every 5 frames as a convex linear combination of old and new
% onces.
upRate = 5;
if rem(f, upRate)==0
[A_neg alpha_qq] = updateDic(dataPath, sz, opt, param, num_n, forMat, p0, f, abs(alpha_q), abs(alpha_p(:,:,id_max)), (b(id_max)/lambda_thr)/prod(patchnum));
end
end
%% Save and Display Tracking Results
save([ title '.mat'], 'result');
% displayResult; % display the tracking results in the whole image sequence
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment