Skip to content

Instantly share code, notes, and snippets.

@pilhoon
Last active January 25, 2018 17:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pilhoon/b6f9850578f26dcf8f8b58ac5249d48a to your computer and use it in GitHub Desktop.
Save pilhoon/b6f9850578f26dcf8f8b58ac5249d48a to your computer and use it in GitHub Desktop.
% test data generation
snr = 7;
xy = [awgn(repmat(1, 100, 1), snr) awgn(repmat(2, 100, 1), snr);
awgn(repmat(3, 100, 1), snr) awgn(repmat(4, 100, 1), snr)];
S = squareform(pdist(xy)); % distance matrix
S = -S + max(S(:))+9;
S(logical(eye(200))) = 0;
% affinity propagation
% http://www.psi.toronto.edu/affinitypropagation/FreyDueckScience07.pdf
N=size(S,1); A=zeros(N,N); R=zeros(N,N); % Initialize messages
S=S+1e-12*randn(N,N)*(max(S(:))-min(S(:))); % Remove degeneracies
lam=0.5; % Set damping factor
for iter=1:1000000
%
% Compute responsibilities (formula (1))
%
Rold=R;
AS=A+S;
[Y,I]=max(AS,[],2); %row max. I is index.
for i=1:N
AS(i,I(i))=-realmax; % remove the largest. ref.realmax=-1.7977e+308
end;
[Y2,I2]=max(AS,[],2); %second max
R=S-repmat(Y,[1,N]); % S - {first max}
for i=1:N
R(i,I(i))=S(i,I(i))-Y2(i); % {first max} - {second max}
end;
R=(1-lam)*R+lam*Rold; % Dampen responsibilities
%
% Compute availabilities (formula (2), (3))
%
Aold=A;
Rp=max(R,0);
for k=1:N
Rp(k,k)=R(k,k); % set diagnal to 0. and give R(k,k)-term of formula (2)
end;
A=repmat(sum(Rp,1),[N,1])-Rp; %'sigma column except me'
dA=diag(A);
A=min(A,0);
for k=1:N
A(k,k)=dA(k);
end;
A=(1-lam)*A+lam*Aold; % Dampen availabilities
end;
fprintf('end. iter = %d, K = %d \n', iter, K);
E=R+A; % Pseudomarginals
I=find(diag(E)>0);
K=length(I); % Indices of exemplars
fprintf('K = %d\n', K);
[tmp, c]=max(S(:,I),[],2);
c(I)=1:K;
idx=I(c); % Assignments
scatter(xy(:,1), xy(:,2), [], c);
title(K);
% [idx,netsim,dpsim,expref]=apclusterLeveraged(s,p,frac,sweeps)
%
% frac is the fraction of data points to be considered.
% sweeps is the number of times to run affinity propagation.
%
function [idx,netsim,dpsim,expref]=apclusterLeveraged(S,p,frac,sweeps);
% p(i) indicates the preference that data point i be chosen as an exemplar.
% Often a good choice is to set all preferences to median(s);
%
% dpsim = the sum of the similarities of the data points to their exemplars
% expref = the sum of the preferences of the identified exemplars
% netsim = dpsim + expref
%
% C = zeros(size(S),'uint8'); % for counting S-computations
if isa(S,'function_handle'), N=S(0); else N=size(S,1); end; M=ceil(frac*N);
s=zeros(M*N+N,3);
s(1:M*N,1)=repmat([1:N]',[M,1]);
% diagonal part
s(M*N+(1:N),1)=(1:N)';
s(M*N+(1:N),2)=(1:N)';
s(M*N+(1:N),3)=realmin('single'); % 1.1755e-38 : noise
ii=randperm(N); ii=ii(1:M);
netsimbest=-1e300; netsimbest=-Inf;
for sw=1:sweeps
% make sparse distance matrix.
% rows = use all(=N) points.
% cols = only picks M points out of N points
for m=1:M
s((m-1)*N+1:m*N,2)=ii(m);
s((m-1)*N+1:m*N,3)=S(1:N,ii(m))+realmin('single');
% C(1:N,ii(m)) = C(1:N,ii(m))+1; % counting S-computations
end;
% run sparse.
[idx,netsim,dpsim,expref]=apclustermex(sparse(s(:,1),s(:,2),s(:,3),N,N),p);
fprintf('.');
if netsim > netsimbest | isinf(netsimbest), % should allow updates if netsim=-Inf;
netsimbest=netsim;
idxbest=idx; dpsimbest=dpsim; exprefbest=expref;
tmp1=unique(idx)';
tmp2=setdiff([1:N],tmp1);
ii=[tmp1,tmp2(randperm(length(tmp2)))]; % sampling and shuffle
ii=ii(1:M);
else
tmp1=unique(idxbest)';
tmp2=setdiff([1:N],tmp1);
ii=[tmp1,tmp2(randperm(length(tmp2)))];
ii=ii(1:M);
end;
end;
idx=idxbest; netsim=netsimbest; dpsim=dpsimbest; expref=exprefbest;
N=100; x=rand(N,2); % Create N, 2-D data points
M=N*N-N; s=zeros(M,3); % Make ALL N^2-N similarities
j=1;
for i=1:N
for k=[1:i-1,i+1:N]
s(j,1)=i; s(j,2)=k; s(j,3)=-sum((x(i,:)-x(k,:)).^2);
j=j+1;
end;
end;
p=median(s(:,3)); % Set preference to median similarity
[idx,netsim,dpsim,expref]=apclusterSparse(s,p,'plot');
fprintf('Number of clusters: %d\n',length(unique(idx)));
fprintf('Fitness (net similarity): %f\n',netsim);
figure; % Make a figures showing the data and the clusters
for i=unique(idx)'
ii=find(idx==i); h=plot(x(ii,1),x(ii,2),'o'); hold on;
col=rand(1,3); set(h,'Color',col,'MarkerFaceColor',col);
xi1=x(i,1)*ones(size(ii)); xi2=x(i,2)*ones(size(ii));
line([x(ii,1),xi1]',[x(ii,2),xi2]','Color',col);
end;
axis equal tight;
function [idx,netsim,dpsim,expref]=apclusterSparse(s,p,varargin);
% Handle arguments to function
if nargin<2 error('Too few input arguments');
else
maxits=1000; convits=100; lam=0.9; plt=0; details=0; nonoise=0;
i=1;
while i<=length(varargin)
if strcmp(varargin{i},'plot')
plt=1; i=i+1;
elseif strcmp(varargin{i},'details')
details=1; i=i+1;
elseif strcmp(varargin{i},'nonoise')
nonoise=1; i=i+1;
elseif strcmp(varargin{i},'maxits')
maxits=varargin{i+1};
i=i+2;
if maxits<=0 error('maxits must be a positive integer'); end;
elseif strcmp(varargin{i},'convits')
convits=varargin{i+1};
i=i+2;
if convits<=0 error('convits must be a positive integer'); end;
elseif strcmp(varargin{i},'dampfact')
lam=varargin{i+1};
i=i+2;
if (lam<0.5)||(lam>=1)
error('dampfact must be >= 0.5 and < 1');
end;
else i=i+1;
end;
end;
end;
if lam>0.9
fprintf('\n*** Warning: Large damping factor in use. Turn on plotting\n');
fprintf(' to monitor the net similarity. The algorithm will\n');
fprintf(' change decisions slowly, so consider using a larger value\n');
fprintf(' of convits.\n\n');
end;
% Check that standard arguments are consistent in size
if length(size(s))~=2 error('s should be a 2D matrix');
elseif length(size(p))>2 error('p should be a vector or a scalar');
elseif size(s,2)==3
tmp=max(max(s(:,1)),max(s(:,2)));
if length(p)==1 N=tmp; else N=length(p); end;
if tmp>N
error('data point index exceeds number of data points');
elseif min(min(s(:,1)),min(s(:,2)))<=0
error('data point indices must be >= 1');
end;
elseif size(s,1)==size(s,2)
N=size(s,1);
if (length(p)~=N)&&(length(p)~=1)
error('p should be scalar or a vector of size N');
end;
else error('s must have 3 columns or be square'); end;
% Make vector of preferences
if length(p)==1
p=p*ones(N,1);
end;
% Append self-similarities (preferences) to s-matrix
tmps=[repmat([1:N]',[1,2]),p];
s=[s;tmps];
M=size(s,1);
% In case user did not remove degeneracies from the input similarities,
% avoid degenerate solutions by adding a small amount of noise to the
% input similarities
if ~nonoise
rns=randn('state');
randn('state',0);
s(:,3)=s(:,3)+(eps*s(:,3)+realmin*100).*rand(M,1);
randn('state',rns);
end;
% Construct indices of neighbors % refer my comment below.
ind1e=zeros(N,1);
for j=1:M
k=s(j,1);
ind1e(k)=ind1e(k)+1;
end;
ind1e=cumsum(ind1e);
ind1s=[1;ind1e(1:end-1)+1];
ind1=zeros(M,1);
for j=1:M
k=s(j,1);
ind1(ind1s(k))=j;
ind1s(k)=ind1s(k)+1;
end;
ind1s=[1;ind1e(1:end-1)+1];
ind2e=zeros(N,1);
for j=1:M
k=s(j,2);
ind2e(k)=ind2e(k)+1;
end;
ind2e=cumsum(ind2e);
ind2s=[1;ind2e(1:end-1)+1];
ind2=zeros(M,1);
for j=1:M
k=s(j,2);
ind2(ind2s(k))=j;
ind2s(k)=ind2s(k)+1;
end;
ind2s=[1;ind2e(1:end-1)+1];
% Allocate space for messages, etc
A=zeros(M,1);
R=zeros(M,1);
t=1;
if plt
netsim=zeros(1,maxits+1);
end;
if details
idx=zeros(N,maxits+1);
netsim=zeros(1,maxits+1);
dpsim=zeros(1,maxits+1);
expref=zeros(1,maxits+1);
end;
% Execute parallel affinity propagation updates
e=zeros(N,convits);
dn=0;
i=0;
while ~dn
i=i+1;
% Compute responsibilities
for j=1:N
ss=s(ind1(ind1s(j):ind1e(j)),3);
as=A(ind1(ind1s(j):ind1e(j)))+ss;
[Y,I]=max(as);
as(I)=-realmax;
[Y2,I2]=max(as);
r=ss-Y;
r(I)=ss(I)-Y2;
R(ind1(ind1s(j):ind1e(j)))=(1-lam)*r+ ...
lam*R(ind1(ind1s(j):ind1e(j)));
end;
% Compute availabilities
for j=1:N
rp=R(ind2(ind2s(j):ind2e(j)));
rp(1:end-1)=max(rp(1:end-1),0);
a=sum(rp)-rp;
a(1:end-1)=min(a(1:end-1),0);
A(ind2(ind2s(j):ind2e(j)))=(1-lam)*a+ ...
lam*A(ind2(ind2s(j):ind2e(j)));
end;
% Check for convergence
E=((A(M-N+1:M)+R(M-N+1:M))>0);
e(:,mod(i-1,convits)+1)=E;
K=sum(E);
if i>=convits || i>=maxits
se=sum(e,2);
unconverged=(sum((se==convits)+(se==0))~=N);
if (~unconverged&&(K>0))||(i==maxits)
dn=1;
end;
end;
% Handle plotting and storage of details, if requested
if plt||details
if K==0
tmpnetsim=nan; tmpdpsim=nan; tmpexpref=nan; tmpidx=nan;
else
tmpidx=zeros(N,1); tmpdpsim=0;
tmpidx(find(E))=find(E); tmpexpref=sum(p(find(E)));
discon=0;
for j=find(E==0)'
ss=s(ind1(ind1s(j):ind1e(j)),3);
ii=s(ind1(ind1s(j):ind1e(j)),2);
ee=find(E(ii));
if length(ee)==0 discon=1;
else
[smx imx]=max(ss(ee));
tmpidx(j)=ii(ee(imx));
tmpdpsim=tmpdpsim+smx;
end;
end;
if discon
tmpnetsim=nan; tmpdpsim=nan; tmpexpref=nan; tmpidx=nan;
else tmpnetsim=tmpdpsim+tmpexpref;
end;
end;
end;
if details
netsim(i)=tmpnetsim; dpsim(i)=tmpdpsim; expref(i)=tmpexpref;
idx(:,i)=tmpidx;
end;
if plt
netsim(i)=tmpnetsim;
figure(234);
tmp=1:i; tmpi=find(~isnan(netsim(1:i)));
plot(tmp(tmpi),netsim(tmpi),'r-');
xlabel('# Iterations');
ylabel('Net similarity of quantized intermediate solution');
drawnow;
end;
end;
% Identify exemplars
E=((A(M-N+1:M)+R(M-N+1:M))>0); K=sum(E);
if K>0
tmpidx=zeros(N,1); tmpidx(find(E))=find(E); % Identify clusters
for j=find(E==0)'
ss=s(ind1(ind1s(j):ind1e(j)),3);
ii=s(ind1(ind1s(j):ind1e(j)),2);
ee=find(E(ii));
[smx imx]=max(ss(ee));
tmpidx(j)=ii(ee(imx));
end;
EE=zeros(N,1);
for j=find(E)'
jj=find(tmpidx==j); mx=-Inf;
ns=zeros(N,1); msk=zeros(N,1);
for m=jj'
mm=s(ind1(ind1s(m):ind1e(m)),2);
msk(mm)=msk(mm)+1;
ns(mm)=ns(mm)+s(ind1(ind1s(m):ind1e(m)),3);
end;
ii=jj(find(msk(jj)==length(jj)));
[smx imx]=max(ns(ii));
EE(ii(imx))=1;
end;
E=EE;
tmpidx=zeros(N,1); tmpdpsim=0;
tmpidx(find(E))=find(E); tmpexpref=sum(p(find(E)));
for j=find(E==0)'
ss=s(ind1(ind1s(j):ind1e(j)),3);
ii=s(ind1(ind1s(j):ind1e(j)),2);
ee=find(E(ii));
[smx imx]=max(ss(ee));
tmpidx(j)=ii(ee(imx));
tmpdpsim=tmpdpsim+smx;
end;
tmpnetsim=tmpdpsim+tmpexpref;
else
tmpidx=nan*ones(N,1); tmpnetsim=nan; tmpexpref=nan;
end;
if details
netsim(i+1)=tmpnetsim; netsim=netsim(1:i+1);
dpsim(i+1)=tmpnetsim-tmpexpref; dpsim=dpsim(1:i+1);
expref(i+1)=tmpexpref; expref=expref(1:i+1);
idx(:,i+1)=tmpidx; idx=idx(:,1:i+1);
else
netsim=tmpnetsim; dpsim=tmpnetsim-tmpexpref;
expref=tmpexpref; idx=tmpidx;
end;
if plt||details
fprintf('\nNumber of identified clusters: %d\n',K);
fprintf('Fitness (net similarity): %f\n',tmpnetsim);
fprintf(' Similarities of data points to exemplars: %f\n',dpsim(end));
fprintf(' Preferences of selected exemplars: %f\n',tmpexpref);
fprintf('Number of iterations: %d\n\n',i);
end;
if unconverged
fprintf('\n*** Warning: Algorithm did not converge. The similarities\n');
fprintf(' may contain degeneracies - add noise to the similarities\n');
fprintf(' to remove degeneracies. To monitor the net similarity,\n');
fprintf(' activate plotting. Also, consider increasing maxits and\n');
fprintf(' if necessary dampfact.\n\n');
end;
snr = 7;
xy = [awgn(repmat(1, 5, 1), snr) awgn(repmat(2, 5, 1), snr);
awgn(repmat(3, 5, 1), snr) awgn(repmat(4, 5, 1), snr)];
S = zeros(10*9/2, 3);
C = combnk(1:10, 2);
%>> xy
%
%xy =
%
% 0.4872 2.0837
% 1.0468 1.9632
% 1.3226 1.1366
% 2.1549 1.8039
% 0.7021 1.1983
% 3.3754 3.7318
% 2.6033 4.2189
% 3.0447 4.3303
% 2.7568 4.7647
% 3.1356 3.9133
for i=1:length(C)
xi = C(i,1);
yi = C(i,2);
d = norm(xy(xi, :)-xy(yi, :)); %this is falsy. d should be -norm(...)
S(i, :) = [C(i,1), C(i,2), d];
end
%>> S
%
%S =
%
% 1 2 0.57245
% 1 3 1.2629
% 1 4 1.691
% 1 5 0.91104
% 1 6 3.3253
% 1 7 3.0061
% 1 8 3.4041
% 1 9 3.5126
% 1 10 3.2189
% 2 3 0.87139
% 2 4 1.1194
% 2 5 0.83891
% 2 6 2.9241
% 2 7 2.7406
% 2 8 3.0975
% 2 9 3.2821
% 2 10 2.8576
% 3 4 1.0668
% 3 5 0.62358
% 3 6 3.309
% 3 7 3.3378
% 3 8 3.6284
% 3 9 3.9013
% 3 10 3.3162
% 4 5 1.5739
% 4 6 2.2818
% 4 7 2.4562
% 4 8 2.6785
% 4 9 3.0213
% 4 10 2.3262
% 5 6 3.6831
% 5 7 3.5691
% 5 8 3.9111
% 5 9 4.1159
% 5 10 3.6459
% 6 7 0.91282
% 6 8 0.6837
% 6 9 1.2039
% 6 10 0.30071
% 7 8 0.45522
% 7 9 0.56697
% 7 10 0.61373
% 8 9 0.52117
% 8 10 0.42676
% 9 10 0.93185
apclusterSparse(S, 0);
[(1:55)' s ind1 ind2]
ans =
1 1 2 1.2908 1 46
2 1 3 0.98971 2 1
3 1 4 1.3646 3 47
4 1 5 0.53551 4 2
5 1 6 3.0272 5 10
6 1 7 3.1811 6 48
7 1 8 2.6518 7 3
8 1 9 2.6847 8 11
9 1 10 2.9787 9 18
10 2 3 1.4746 46 49
11 2 4 0.19874 10 4
12 2 5 1.2703 11 12
13 2 6 3.1465 12 19
14 2 7 3.4175 13 25
15 2 8 2.9135 14 50
16 2 9 2.7438 15 5
17 2 10 3.1162 16 13
18 3 4 1.4072 17 20
19 3 5 0.45646 47 26
20 3 6 2.0407 18 31
21 3 7 2.2126 19 51
22 3 8 1.6809 20 6
23 3 9 1.6951 21 14
24 3 10 1.9934 22 21
25 4 5 1.2638 23 27
26 4 6 2.9858 24 32
27 4 7 3.2659 48 36
28 4 8 2.7697 25 52
29 4 9 2.5816 26 7
30 4 10 2.9576 27 15
31 5 6 2.4971 28 22
32 5 7 2.6632 29 28
33 5 8 2.132 30 33
34 5 9 2.1501 49 37
35 5 10 2.4495 31 40
36 6 7 0.36396 32 53
37 6 8 0.45161 33 8
38 6 9 0.40532 34 16
39 6 10 0.059503 35 23
40 7 8 0.53177 50 29
41 7 9 0.72777 36 34
42 7 10 0.35529 37 38
43 8 9 0.44005 38 41
44 8 10 0.39225 39 43
45 9 10 0.38415 51 54
46 1 1 3.4908e-307 40 9
47 2 2 7.0924e-307 41 17
48 3 3 1.0571e-307 42 24
49 4 4 5.8453e-307 52 30
50 5 5 1.4614e-306 43 35
51 6 6 9.2654e-307 44 39
52 7 7 5.9746e-307 53 42
53 8 8 1.1337e-306 45 44
54 9 9 6.0503e-307 54 45
55 10 10 1.7434e-306 55 55
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment