Skip to content

Instantly share code, notes, and snippets.

@caub
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save caub/79c0db3523f63944ac43 to your computer and use it in GitHub Desktop.
Save caub/79c0db3523f63944ac43 to your computer and use it in GitHub Desktop.
function model = lda(X, y)
% conversion of scikit-learn https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/lda.py
% X =[-1 -1; -2 -1;-3 -2;1 1;2 1; 3 2];
% y = [1 1 1 2 2 2];
% m = lda(X,y);
% m.predict([-0.8 -1]) %1
[model.classes,~,y] = unique(y);
[nsamples,nfeatures] = size(X);
nclasses = length(model.classes);
model.priors = histc(y, model.classes)'/nsamples;
% means: nclasses*nfeatures matrix
model.means = zeros(nclasses,size(X,2));
Xc = zeros(size(X));
model.cov = zeros(nfeatures, nfeatures);
for i=model.classes
Xg = X(y==i,:);
model.means(i,:) = mean(Xg);
Xc(y==i,:) = bsxfun(@minus, Xg, model.means(i,:));
model.cov = model.cov + Xc(i)'*Xc(i);
end
model.cov = model.cov/(nsamples - nclasses);
% 1) within (univariate) scaling by with classes std-dev
model.std = sqrt(sum(Xc.^2)/size(Xc,1));
% avoid division by zero in normalization
model.std(model.std==0) = 1;
fac = 1/(nsamples - nclasses);
% 2) Within variance scaling
X = sqrt(fac) * bsxfun(@rdivide, Xc, model.std );
% SVD of centered (within)scaled data
[U,S,V] = svd(X);
rank_ = rank(S);
S = diag(S);
if rank_<nfeatures
'Collinear variables'
end
% Scaling of within covariance is: V' 1/S
scalings = bsxfun(@rdivide, bsxfun(@rdivide, V(1:rank_,:), model.std)', S(1:rank_)');
% Between variance scaling
% Overall mean
model.xbar = model.priors * model.means;
% Scale weighted centers
X = bsxfun(@times, sqrt(nsamples * model.priors * fac), bsxfun(@minus,model.means,model.xbar)')' * scalings;
% Centers are living in a space with nclasses-1 dim (maximum)
% Use svd to find projection in the space spanned by the nclasses centers
[~, S, V] = svd(X);
rank_ = rank(S)
% compose the scalings
model.scalings = scalings * V(1:rank_,:)';
scalings
model.scalings
% weight vectors / centroids
model.coef = bsxfun(@minus,model.means,model.xbar) * model.scalings;
model.coef
model.intercept = bsxfun(@plus, -0.5*sum(model.coef .^ 2, 2)', log(model.priors));
model.decision_function = @(X) bsxfun(@plus, bsxfun(@times, bsxfun(@minus,X,model.xbar)*model.scalings, model.coef'), model.intercept);
model.project = @(X,n) (X-model.xbar)*model.scalings*model.coef(1:n,:)';

lda

X =[-1 -1; -2 -1;-3 -2;1 1;2 1; 3 2];
y = [1 1 1 2 2 2];
m = lda(X,y);
m.predict([-0.8 -1]) %1
gscatter(X(:,1),X(:,2),y','rb','v^',[],'off');
hold on
subtract = @(X) X(:,2) - X(:,1);
ezplot(@(x,y) subtract(m.decision_function([x y])))

% ----
g1=gmdistribution([1 2], [.02 -.05;-.05 .8]);
g2=gmdistribution([1.5 1], [.02 .05;.05 .9]);
X = [g1.random(100); g2.random(90)];
y = [repmat(1, 1, 100) repmat(2, 1, 90)];

m=lda(X, y);
gscatter(X(:,1),X(:,2),y','rb','v^',[],'off');
hold on
subtract = @(X) X(:,2) - X(:,1);
ezplot(@(x,y) subtract(m.decision_function([x y])))

d = m.decision_function([1 2;1 1;1 0;1 -1]);
[~,i]=max(d,[],2)
predictions = m.classes(i)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment