pilhoon/ap.mat

## ap.mat
% test data generation
snr = 7;
xy = [awgn(repmat(1, 100, 1), snr) awgn(repmat(2, 100, 1), snr);
      awgn(repmat(3, 100, 1), snr) awgn(repmat(4, 100, 1), snr)];
S = squareform(pdist(xy)); % distance matrix
S = -S + max(S(:))+9;
S(logical(eye(200))) = 0;

% affinity propagation
% http://www.psi.toronto.edu/affinitypropagation/FreyDueckScience07.pdf

N=size(S,1); A=zeros(N,N); R=zeros(N,N); % Initialize messages
S=S+1e-12*randn(N,N)*(max(S(:))-min(S(:))); % Remove degeneracies
lam=0.5; % Set damping factor
for iter=1:1000000
    %
    % Compute responsibilities (formula (1))
    %
    Rold=R;
    AS=A+S;
    [Y,I]=max(AS,[],2); %row max. I is index.
    for i=1:N
        AS(i,I(i))=-realmax; % remove the largest. ref.realmax=-1.7977e+308
    end;
    [Y2,I2]=max(AS,[],2); %second max
    R=S-repmat(Y,[1,N]); % S - {first max}
    for i=1:N
        R(i,I(i))=S(i,I(i))-Y2(i);  % {first max} - {second max}
    end;
    R=(1-lam)*R+lam*Rold; % Dampen responsibilities

    %
    % Compute availabilities (formula (2), (3))
    %
    Aold=A;
    Rp=max(R,0);
    for k=1:N
        Rp(k,k)=R(k,k);  % set diagnal to 0. and give R(k,k)-term of formula (2)
    end;
    A=repmat(sum(Rp,1),[N,1])-Rp; %'sigma column except me'
    dA=diag(A);
    A=min(A,0);
    for k=1:N
        A(k,k)=dA(k);
    end;
    A=(1-lam)*A+lam*Aold; % Dampen availabilities
end;
fprintf('end. iter = %d, K = %d \n', iter, K);
E=R+A; % Pseudomarginals
I=find(diag(E)>0);
K=length(I); % Indices of exemplars

fprintf('K = %d\n', K);
[tmp, c]=max(S(:,I),[],2);
c(I)=1:K;
idx=I(c); % Assignments


scatter(xy(:,1), xy(:,2), [], c);
title(K);

## leveraged_ap.mat
% [idx,netsim,dpsim,expref]=apclusterLeveraged(s,p,frac,sweeps)
%
% frac is the fraction of data points to be considered.
% sweeps is the number of times to run affinity propagation.
%

function [idx,netsim,dpsim,expref]=apclusterLeveraged(S,p,frac,sweeps);
% p(i) indicates the preference that data point i be chosen as an exemplar.
% Often a good choice is to set all preferences to median(s);

%
% dpsim = the sum of the similarities of the data points to their exemplars
% expref = the sum of the preferences of the identified exemplars
% netsim = dpsim + expref
%

% C = zeros(size(S),'uint8'); % for counting S-computations
if isa(S,'function_handle'), N=S(0); else N=size(S,1); end; M=ceil(frac*N);
s=zeros(M*N+N,3);
s(1:M*N,1)=repmat([1:N]',[M,1]);

% diagonal part
s(M*N+(1:N),1)=(1:N)';
s(M*N+(1:N),2)=(1:N)';
s(M*N+(1:N),3)=realmin('single'); % 1.1755e-38 : noise


ii=randperm(N); ii=ii(1:M);
netsimbest=-1e300; netsimbest=-Inf;
for sw=1:sweeps
    % make sparse distance matrix.
    %   rows = use all(=N) points.
    %   cols = only picks M points out of N points
    for m=1:M
        s((m-1)*N+1:m*N,2)=ii(m);
        s((m-1)*N+1:m*N,3)=S(1:N,ii(m))+realmin('single');
%       C(1:N,ii(m)) = C(1:N,ii(m))+1;  % counting S-computations
    end;

    % run sparse.
    [idx,netsim,dpsim,expref]=apclustermex(sparse(s(:,1),s(:,2),s(:,3),N,N),p);
    fprintf('.');
    if netsim > netsimbest | isinf(netsimbest), % should allow updates if netsim=-Inf;
        netsimbest=netsim;
        idxbest=idx; dpsimbest=dpsim; exprefbest=expref;
        tmp1=unique(idx)';
        tmp2=setdiff([1:N],tmp1);
        ii=[tmp1,tmp2(randperm(length(tmp2)))]; % sampling and shuffle
        ii=ii(1:M);
    else
        tmp1=unique(idxbest)';
        tmp2=setdiff([1:N],tmp1);
        ii=[tmp1,tmp2(randperm(length(tmp2)))];
        ii=ii(1:M);
    end;
end;
idx=idxbest; netsim=netsimbest; dpsim=dpsimbest; expref=exprefbest;

## sparse_eg1.mat
N=100; x=rand(N,2); % Create N, 2-D data points
M=N*N-N; s=zeros(M,3); % Make ALL N^2-N similarities
j=1;
for i=1:N
  for k=[1:i-1,i+1:N]
    s(j,1)=i; s(j,2)=k; s(j,3)=-sum((x(i,:)-x(k,:)).^2);
    j=j+1;
  end;
end;
p=median(s(:,3)); % Set preference to median similarity
[idx,netsim,dpsim,expref]=apclusterSparse(s,p,'plot');
fprintf('Number of clusters: %d\n',length(unique(idx)));
fprintf('Fitness (net similarity): %f\n',netsim);
figure; % Make a figures showing the data and the clusters
for i=unique(idx)'
  ii=find(idx==i); h=plot(x(ii,1),x(ii,2),'o'); hold on;
  col=rand(1,3); set(h,'Color',col,'MarkerFaceColor',col);
  xi1=x(i,1)*ones(size(ii)); xi2=x(i,2)*ones(size(ii));
  line([x(ii,1),xi1]',[x(ii,2),xi2]','Color',col);
end;
axis equal tight;

## sparse_eg2.mat
function [idx,netsim,dpsim,expref]=apclusterSparse(s,p,varargin);

% Handle arguments to function
if nargin<2 error('Too few input arguments');
else
    maxits=1000; convits=100; lam=0.9; plt=0; details=0; nonoise=0;
    i=1;
    while i<=length(varargin)
        if strcmp(varargin{i},'plot')
            plt=1; i=i+1;
        elseif strcmp(varargin{i},'details')
            details=1; i=i+1;
        elseif strcmp(varargin{i},'nonoise')
            nonoise=1; i=i+1;
        elseif strcmp(varargin{i},'maxits')
            maxits=varargin{i+1};
            i=i+2;
            if maxits<=0 error('maxits must be a positive integer'); end;
        elseif strcmp(varargin{i},'convits')
            convits=varargin{i+1};
            i=i+2;
            if convits<=0 error('convits must be a positive integer'); end;
        elseif strcmp(varargin{i},'dampfact')
            lam=varargin{i+1};
            i=i+2;
            if (lam<0.5)||(lam>=1)
                error('dampfact must be >= 0.5 and < 1');
            end;
        else i=i+1;
        end;
    end;
end;
if lam>0.9
    fprintf('\n*** Warning: Large damping factor in use. Turn on plotting\n');
    fprintf('    to monitor the net similarity. The algorithm will\n');
    fprintf('    change decisions slowly, so consider using a larger value\n');
    fprintf('    of convits.\n\n');
end;

% Check that standard arguments are consistent in size
if length(size(s))~=2 error('s should be a 2D matrix');
elseif length(size(p))>2 error('p should be a vector or a scalar');
elseif size(s,2)==3
    tmp=max(max(s(:,1)),max(s(:,2)));
    if length(p)==1 N=tmp; else N=length(p); end;
    if tmp>N
        error('data point index exceeds number of data points');
    elseif min(min(s(:,1)),min(s(:,2)))<=0
        error('data point indices must be >= 1');
    end;
elseif size(s,1)==size(s,2)
    N=size(s,1);
    if (length(p)~=N)&&(length(p)~=1)
        error('p should be scalar or a vector of size N');
    end;
else error('s must have 3 columns or be square'); end;

% Make vector of preferences
if length(p)==1
    p=p*ones(N,1);
end;

% Append self-similarities (preferences) to s-matrix
tmps=[repmat([1:N]',[1,2]),p];
s=[s;tmps];
M=size(s,1);

% In case user did not remove degeneracies from the input similarities,
% avoid degenerate solutions by adding a small amount of noise to the
% input similarities
if ~nonoise
    rns=randn('state');
    randn('state',0);
    s(:,3)=s(:,3)+(eps*s(:,3)+realmin*100).*rand(M,1);
    randn('state',rns);
end;

% Construct indices of neighbors % refer my comment below.
ind1e=zeros(N,1);
for j=1:M
    k=s(j,1);
    ind1e(k)=ind1e(k)+1;
end;
ind1e=cumsum(ind1e);
ind1s=[1;ind1e(1:end-1)+1];
ind1=zeros(M,1);

for j=1:M
    k=s(j,1);
    ind1(ind1s(k))=j;
    ind1s(k)=ind1s(k)+1;
end;
ind1s=[1;ind1e(1:end-1)+1];
ind2e=zeros(N,1);

for j=1:M
    k=s(j,2);
    ind2e(k)=ind2e(k)+1;
end;
ind2e=cumsum(ind2e);
ind2s=[1;ind2e(1:end-1)+1];
ind2=zeros(M,1);

for j=1:M
    k=s(j,2);
    ind2(ind2s(k))=j;
    ind2s(k)=ind2s(k)+1;
end;

ind2s=[1;ind2e(1:end-1)+1];

% Allocate space for messages, etc
A=zeros(M,1);
R=zeros(M,1);
t=1;
if plt
    netsim=zeros(1,maxits+1);
end;
if details
    idx=zeros(N,maxits+1);
    netsim=zeros(1,maxits+1);
    dpsim=zeros(1,maxits+1);
    expref=zeros(1,maxits+1);
end;

% Execute parallel affinity propagation updates
e=zeros(N,convits);
dn=0;
i=0;
while ~dn
    i=i+1;

    % Compute responsibilities
    for j=1:N
        ss=s(ind1(ind1s(j):ind1e(j)),3);
        as=A(ind1(ind1s(j):ind1e(j)))+ss;

        [Y,I]=max(as);
        as(I)=-realmax;
        [Y2,I2]=max(as);

        r=ss-Y;
        r(I)=ss(I)-Y2;

        R(ind1(ind1s(j):ind1e(j)))=(1-lam)*r+ ...
            lam*R(ind1(ind1s(j):ind1e(j)));
    end;

    % Compute availabilities
    for j=1:N
        rp=R(ind2(ind2s(j):ind2e(j)));
        rp(1:end-1)=max(rp(1:end-1),0);
        a=sum(rp)-rp;
        a(1:end-1)=min(a(1:end-1),0);
        A(ind2(ind2s(j):ind2e(j)))=(1-lam)*a+ ...
            lam*A(ind2(ind2s(j):ind2e(j)));
    end;

    % Check for convergence
    E=((A(M-N+1:M)+R(M-N+1:M))>0);
    e(:,mod(i-1,convits)+1)=E;
    K=sum(E);
    if i>=convits || i>=maxits
        se=sum(e,2);
        unconverged=(sum((se==convits)+(se==0))~=N);
        if (~unconverged&&(K>0))||(i==maxits)
            dn=1;
        end;
    end;

    % Handle plotting and storage of details, if requested
    if plt||details
        if K==0
            tmpnetsim=nan; tmpdpsim=nan; tmpexpref=nan; tmpidx=nan;
        else
            tmpidx=zeros(N,1); tmpdpsim=0;
            tmpidx(find(E))=find(E); tmpexpref=sum(p(find(E)));
            discon=0;
            for j=find(E==0)'
                ss=s(ind1(ind1s(j):ind1e(j)),3);
                ii=s(ind1(ind1s(j):ind1e(j)),2);
                ee=find(E(ii));
                if length(ee)==0 discon=1;
                else
                    [smx imx]=max(ss(ee));
                    tmpidx(j)=ii(ee(imx));
                    tmpdpsim=tmpdpsim+smx;
                end;
            end;
            if discon
                tmpnetsim=nan; tmpdpsim=nan; tmpexpref=nan; tmpidx=nan;
            else tmpnetsim=tmpdpsim+tmpexpref;
            end;
        end;
    end;
    if details
        netsim(i)=tmpnetsim; dpsim(i)=tmpdpsim; expref(i)=tmpexpref;
        idx(:,i)=tmpidx;
    end;
    if plt
        netsim(i)=tmpnetsim;
        figure(234);
        tmp=1:i; tmpi=find(~isnan(netsim(1:i)));
        plot(tmp(tmpi),netsim(tmpi),'r-');
        xlabel('# Iterations');
        ylabel('Net similarity of quantized intermediate solution');
        drawnow;
    end;
end;
% Identify  exemplars
E=((A(M-N+1:M)+R(M-N+1:M))>0); K=sum(E);
if K>0
    tmpidx=zeros(N,1); tmpidx(find(E))=find(E); % Identify clusters
    for j=find(E==0)'
        ss=s(ind1(ind1s(j):ind1e(j)),3);
        ii=s(ind1(ind1s(j):ind1e(j)),2);
        ee=find(E(ii));
        [smx imx]=max(ss(ee));
        tmpidx(j)=ii(ee(imx));
    end;
    EE=zeros(N,1);
    for j=find(E)'
        jj=find(tmpidx==j); mx=-Inf;
        ns=zeros(N,1); msk=zeros(N,1);
        for m=jj'
            mm=s(ind1(ind1s(m):ind1e(m)),2);
            msk(mm)=msk(mm)+1;
            ns(mm)=ns(mm)+s(ind1(ind1s(m):ind1e(m)),3);
        end;
        ii=jj(find(msk(jj)==length(jj)));
        [smx imx]=max(ns(ii));
        EE(ii(imx))=1;
    end;
    E=EE;
    tmpidx=zeros(N,1); tmpdpsim=0;
    tmpidx(find(E))=find(E); tmpexpref=sum(p(find(E)));
    for j=find(E==0)'
        ss=s(ind1(ind1s(j):ind1e(j)),3);
        ii=s(ind1(ind1s(j):ind1e(j)),2);
        ee=find(E(ii));
        [smx imx]=max(ss(ee));
        tmpidx(j)=ii(ee(imx));
        tmpdpsim=tmpdpsim+smx;
    end;
    tmpnetsim=tmpdpsim+tmpexpref;
else
    tmpidx=nan*ones(N,1); tmpnetsim=nan; tmpexpref=nan;
end;
if details
    netsim(i+1)=tmpnetsim; netsim=netsim(1:i+1);
    dpsim(i+1)=tmpnetsim-tmpexpref; dpsim=dpsim(1:i+1);
    expref(i+1)=tmpexpref; expref=expref(1:i+1);
    idx(:,i+1)=tmpidx; idx=idx(:,1:i+1);
else
    netsim=tmpnetsim; dpsim=tmpnetsim-tmpexpref;
    expref=tmpexpref; idx=tmpidx;
end;
if plt||details
    fprintf('\nNumber of identified clusters: %d\n',K);
    fprintf('Fitness (net similarity): %f\n',tmpnetsim);
    fprintf('  Similarities of data points to exemplars: %f\n',dpsim(end));
    fprintf('  Preferences of selected exemplars: %f\n',tmpexpref);
    fprintf('Number of iterations: %d\n\n',i);
end;
if unconverged
    fprintf('\n*** Warning: Algorithm did not converge. The similarities\n');
    fprintf('    may contain degeneracies - add noise to the similarities\n');
    fprintf('    to remove degeneracies. To monitor the net similarity,\n');
    fprintf('    activate plotting. Also, consider increasing maxits and\n');
    fprintf('    if necessary dampfact.\n\n');
end;

## sparse_eg3
snr = 7;
xy = [awgn(repmat(1, 5, 1), snr) awgn(repmat(2, 5, 1), snr);
      awgn(repmat(3, 5, 1), snr) awgn(repmat(4, 5, 1), snr)];
S = zeros(10*9/2, 3);
C = combnk(1:10, 2);

%>> xy
%
%xy =
%
%    0.4872    2.0837
%    1.0468    1.9632
%    1.3226    1.1366
%    2.1549    1.8039
%    0.7021    1.1983
%    3.3754    3.7318
%    2.6033    4.2189
%    3.0447    4.3303
%    2.7568    4.7647
%    3.1356    3.9133

for i=1:length(C)
    xi = C(i,1);
    yi = C(i,2);
    d = norm(xy(xi, :)-xy(yi, :)); %this is falsy. d should be -norm(...)
    S(i, :) = [C(i,1), C(i,2), d];
end

%>> S
%
%S =
%
%            1            2      0.57245
%            1            3       1.2629
%            1            4        1.691
%            1            5      0.91104
%            1            6       3.3253
%            1            7       3.0061
%            1            8       3.4041
%            1            9       3.5126
%            1           10       3.2189
%            2            3      0.87139
%            2            4       1.1194
%            2            5      0.83891
%            2            6       2.9241
%            2            7       2.7406
%            2            8       3.0975
%            2            9       3.2821
%            2           10       2.8576
%            3            4       1.0668
%            3            5      0.62358
%            3            6        3.309
%            3            7       3.3378
%            3            8       3.6284
%            3            9       3.9013
%            3           10       3.3162
%            4            5       1.5739
%            4            6       2.2818
%            4            7       2.4562
%            4            8       2.6785
%            4            9       3.0213
%            4           10       2.3262
%            5            6       3.6831
%            5            7       3.5691
%            5            8       3.9111
%            5            9       4.1159
%            5           10       3.6459
%            6            7      0.91282
%            6            8       0.6837
%            6            9       1.2039
%            6           10      0.30071
%            7            8      0.45522
%            7            9      0.56697
%            7           10      0.61373
%            8            9      0.52117
%            8           10      0.42676
%            9           10      0.93185

apclusterSparse(S, 0);

## sparse_eg4
[(1:55)' s ind1 ind2]
ans =
            1            1            2       1.2908            1           46
            2            1            3      0.98971            2            1
            3            1            4       1.3646            3           47
            4            1            5      0.53551            4            2
            5            1            6       3.0272            5           10
            6            1            7       3.1811            6           48
            7            1            8       2.6518            7            3
            8            1            9       2.6847            8           11
            9            1           10       2.9787            9           18
           10            2            3       1.4746           46           49
           11            2            4      0.19874           10            4
           12            2            5       1.2703           11           12
           13            2            6       3.1465           12           19
           14            2            7       3.4175           13           25
           15            2            8       2.9135           14           50
           16            2            9       2.7438           15            5
           17            2           10       3.1162           16           13
           18            3            4       1.4072           17           20
           19            3            5      0.45646           47           26
           20            3            6       2.0407           18           31
           21            3            7       2.2126           19           51
           22            3            8       1.6809           20            6
           23            3            9       1.6951           21           14
           24            3           10       1.9934           22           21
           25            4            5       1.2638           23           27
           26            4            6       2.9858           24           32
           27            4            7       3.2659           48           36
           28            4            8       2.7697           25           52
           29            4            9       2.5816           26            7
           30            4           10       2.9576           27           15
           31            5            6       2.4971           28           22
           32            5            7       2.6632           29           28
           33            5            8        2.132           30           33
           34            5            9       2.1501           49           37
           35            5           10       2.4495           31           40
           36            6            7      0.36396           32           53
           37            6            8      0.45161           33            8
           38            6            9      0.40532           34           16
           39            6           10     0.059503           35           23
           40            7            8      0.53177           50           29
           41            7            9      0.72777           36           34
           42            7           10      0.35529           37           38
           43            8            9      0.44005           38           41
           44            8           10      0.39225           39           43
           45            9           10      0.38415           51           54
           46            1            1  3.4908e-307           40            9
           47            2            2  7.0924e-307           41           17
           48            3            3  1.0571e-307           42           24
           49            4            4  5.8453e-307           52           30
           50            5            5  1.4614e-306           43           35
           51            6            6  9.2654e-307           44           39
           52            7            7  5.9746e-307           53           42
           53            8            8  1.1337e-306           45           44
           54            9            9  6.0503e-307           54           45
           55           10           10  1.7434e-306           55           55
	% test data generation
	snr = 7;
	xy = [awgn(repmat(1, 100, 1), snr) awgn(repmat(2, 100, 1), snr);
	awgn(repmat(3, 100, 1), snr) awgn(repmat(4, 100, 1), snr)];
	S = squareform(pdist(xy)); % distance matrix
	S = -S + max(S(:))+9;
	S(logical(eye(200))) = 0;

	% affinity propagation
	% http://www.psi.toronto.edu/affinitypropagation/FreyDueckScience07.pdf

	N=size(S,1); A=zeros(N,N); R=zeros(N,N); % Initialize messages
	S=S+1e-12randn(N,N)(max(S(:))-min(S(:))); % Remove degeneracies
	lam=0.5; % Set damping factor
	for iter=1:1000000
	%
	% Compute responsibilities (formula (1))
	%
	Rold=R;
	AS=A+S;
	[Y,I]=max(AS,[],2); %row max. I is index.
	for i=1:N
	AS(i,I(i))=-realmax; % remove the largest. ref.realmax=-1.7977e+308
	end;
	[Y2,I2]=max(AS,[],2); %second max
	R=S-repmat(Y,[1,N]); % S - {first max}
	for i=1:N
	R(i,I(i))=S(i,I(i))-Y2(i); % {first max} - {second max}
	end;
	R=(1-lam)R+lamRold; % Dampen responsibilities

	%
	% Compute availabilities (formula (2), (3))
	%
	Aold=A;
	Rp=max(R,0);
	for k=1:N
	Rp(k,k)=R(k,k); % set diagnal to 0. and give R(k,k)-term of formula (2)
	end;
	A=repmat(sum(Rp,1),[N,1])-Rp; %'sigma column except me'
	dA=diag(A);
	A=min(A,0);
	for k=1:N
	A(k,k)=dA(k);
	end;
	A=(1-lam)A+lamAold; % Dampen availabilities
	end;
	fprintf('end. iter = %d, K = %d \n', iter, K);
	E=R+A; % Pseudomarginals
	I=find(diag(E)>0);
	K=length(I); % Indices of exemplars

	fprintf('K = %d\n', K);
	[tmp, c]=max(S(:,I),[],2);
	c(I)=1:K;
	idx=I(c); % Assignments


	scatter(xy(:,1), xy(:,2), [], c);
	title(K);
	% [idx,netsim,dpsim,expref]=apclusterLeveraged(s,p,frac,sweeps)
	%
	% frac is the fraction of data points to be considered.
	% sweeps is the number of times to run affinity propagation.
	%

	function [idx,netsim,dpsim,expref]=apclusterLeveraged(S,p,frac,sweeps);
	% p(i) indicates the preference that data point i be chosen as an exemplar.
	% Often a good choice is to set all preferences to median(s);

	%
	% dpsim = the sum of the similarities of the data points to their exemplars
	% expref = the sum of the preferences of the identified exemplars
	% netsim = dpsim + expref
	%

	% C = zeros(size(S),'uint8'); % for counting S-computations
	if isa(S,'function_handle'), N=S(0); else N=size(S,1); end; M=ceil(frac*N);
	s=zeros(M*N+N,3);
	s(1:M*N,1)=repmat([1:N]',[M,1]);

	% diagonal part
	s(M*N+(1:N),1)=(1:N)';
	s(M*N+(1:N),2)=(1:N)';
	s(M*N+(1:N),3)=realmin('single'); % 1.1755e-38 : noise


	ii=randperm(N); ii=ii(1:M);
	netsimbest=-1e300; netsimbest=-Inf;
	for sw=1:sweeps
	% make sparse distance matrix.
	% rows = use all(=N) points.
	% cols = only picks M points out of N points
	for m=1:M
	s((m-1)N+1:mN,2)=ii(m);
	s((m-1)N+1:mN,3)=S(1:N,ii(m))+realmin('single');
	% C(1:N,ii(m)) = C(1:N,ii(m))+1; % counting S-computations
	end;

	% run sparse.
	[idx,netsim,dpsim,expref]=apclustermex(sparse(s(:,1),s(:,2),s(:,3),N,N),p);
	fprintf('.');
	if netsim > netsimbest \| isinf(netsimbest), % should allow updates if netsim=-Inf;
	netsimbest=netsim;
	idxbest=idx; dpsimbest=dpsim; exprefbest=expref;
	tmp1=unique(idx)';
	tmp2=setdiff([1:N],tmp1);
	ii=[tmp1,tmp2(randperm(length(tmp2)))]; % sampling and shuffle
	ii=ii(1:M);
	else
	tmp1=unique(idxbest)';
	tmp2=setdiff([1:N],tmp1);
	ii=[tmp1,tmp2(randperm(length(tmp2)))];
	ii=ii(1:M);
	end;
	end;
	idx=idxbest; netsim=netsimbest; dpsim=dpsimbest; expref=exprefbest;
	N=100; x=rand(N,2); % Create N, 2-D data points
	M=N*N-N; s=zeros(M,3); % Make ALL N^2-N similarities
	j=1;
	for i=1:N
	for k=[1:i-1,i+1:N]
	s(j,1)=i; s(j,2)=k; s(j,3)=-sum((x(i,:)-x(k,:)).^2);
	j=j+1;
	end;
	end;
	p=median(s(:,3)); % Set preference to median similarity
	[idx,netsim,dpsim,expref]=apclusterSparse(s,p,'plot');
	fprintf('Number of clusters: %d\n',length(unique(idx)));
	fprintf('Fitness (net similarity): %f\n',netsim);
	figure; % Make a figures showing the data and the clusters
	for i=unique(idx)'
	ii=find(idx==i); h=plot(x(ii,1),x(ii,2),'o'); hold on;
	col=rand(1,3); set(h,'Color',col,'MarkerFaceColor',col);
	xi1=x(i,1)ones(size(ii)); xi2=x(i,2)ones(size(ii));
	line([x(ii,1),xi1]',[x(ii,2),xi2]','Color',col);
	end;
	axis equal tight;
	function [idx,netsim,dpsim,expref]=apclusterSparse(s,p,varargin);

	% Handle arguments to function
	if nargin<2 error('Too few input arguments');
	else
	maxits=1000; convits=100; lam=0.9; plt=0; details=0; nonoise=0;
	i=1;
	while i<=length(varargin)
	if strcmp(varargin{i},'plot')
	plt=1; i=i+1;
	elseif strcmp(varargin{i},'details')
	details=1; i=i+1;
	elseif strcmp(varargin{i},'nonoise')
	nonoise=1; i=i+1;
	elseif strcmp(varargin{i},'maxits')
	maxits=varargin{i+1};
	i=i+2;
	if maxits<=0 error('maxits must be a positive integer'); end;
	elseif strcmp(varargin{i},'convits')
	convits=varargin{i+1};
	i=i+2;
	if convits<=0 error('convits must be a positive integer'); end;
	elseif strcmp(varargin{i},'dampfact')
	lam=varargin{i+1};
	i=i+2;
	if (lam<0.5)\|\|(lam>=1)
	error('dampfact must be >= 0.5 and < 1');
	end;
	else i=i+1;
	end;
	end;
	end;
	if lam>0.9
	fprintf('\n*** Warning: Large damping factor in use. Turn on plotting\n');
	fprintf(' to monitor the net similarity. The algorithm will\n');
	fprintf(' change decisions slowly, so consider using a larger value\n');
	fprintf(' of convits.\n\n');
	end;

	% Check that standard arguments are consistent in size
	if length(size(s))~=2 error('s should be a 2D matrix');
	elseif length(size(p))>2 error('p should be a vector or a scalar');
	elseif size(s,2)==3
	tmp=max(max(s(:,1)),max(s(:,2)));
	if length(p)==1 N=tmp; else N=length(p); end;
	if tmp>N
	error('data point index exceeds number of data points');
	elseif min(min(s(:,1)),min(s(:,2)))<=0
	error('data point indices must be >= 1');
	end;
	elseif size(s,1)==size(s,2)
	N=size(s,1);
	if (length(p)~=N)&&(length(p)~=1)
	error('p should be scalar or a vector of size N');
	end;
	else error('s must have 3 columns or be square'); end;

	% Make vector of preferences
	if length(p)==1
	p=p*ones(N,1);
	end;

	% Append self-similarities (preferences) to s-matrix
	tmps=[repmat([1:N]',[1,2]),p];
	s=[s;tmps];
	M=size(s,1);

	% In case user did not remove degeneracies from the input similarities,
	% avoid degenerate solutions by adding a small amount of noise to the
	% input similarities
	if ~nonoise
	rns=randn('state');
	randn('state',0);
	s(:,3)=s(:,3)+(epss(:,3)+realmin100).*rand(M,1);
	randn('state',rns);
	end;

	% Construct indices of neighbors % refer my comment below.
	ind1e=zeros(N,1);
	for j=1:M
	k=s(j,1);
	ind1e(k)=ind1e(k)+1;
	end;
	ind1e=cumsum(ind1e);
	ind1s=[1;ind1e(1:end-1)+1];
	ind1=zeros(M,1);

	for j=1:M
	k=s(j,1);
	ind1(ind1s(k))=j;
	ind1s(k)=ind1s(k)+1;
	end;
	ind1s=[1;ind1e(1:end-1)+1];
	ind2e=zeros(N,1);

	for j=1:M
	k=s(j,2);
	ind2e(k)=ind2e(k)+1;
	end;
	ind2e=cumsum(ind2e);
	ind2s=[1;ind2e(1:end-1)+1];
	ind2=zeros(M,1);

	for j=1:M
	k=s(j,2);
	ind2(ind2s(k))=j;
	ind2s(k)=ind2s(k)+1;
	end;

	ind2s=[1;ind2e(1:end-1)+1];

	% Allocate space for messages, etc
	A=zeros(M,1);
	R=zeros(M,1);
	t=1;
	if plt
	netsim=zeros(1,maxits+1);
	end;
	if details
	idx=zeros(N,maxits+1);
	netsim=zeros(1,maxits+1);
	dpsim=zeros(1,maxits+1);
	expref=zeros(1,maxits+1);
	end;

	% Execute parallel affinity propagation updates
	e=zeros(N,convits);
	dn=0;
	i=0;
	while ~dn
	i=i+1;

	% Compute responsibilities
	for j=1:N
	ss=s(ind1(ind1s(j):ind1e(j)),3);
	as=A(ind1(ind1s(j):ind1e(j)))+ss;

	[Y,I]=max(as);
	as(I)=-realmax;
	[Y2,I2]=max(as);

	r=ss-Y;
	r(I)=ss(I)-Y2;

	R(ind1(ind1s(j):ind1e(j)))=(1-lam)*r+ ...
	lam*R(ind1(ind1s(j):ind1e(j)));
	end;

	% Compute availabilities
	for j=1:N
	rp=R(ind2(ind2s(j):ind2e(j)));
	rp(1:end-1)=max(rp(1:end-1),0);
	a=sum(rp)-rp;
	a(1:end-1)=min(a(1:end-1),0);
	A(ind2(ind2s(j):ind2e(j)))=(1-lam)*a+ ...
	lam*A(ind2(ind2s(j):ind2e(j)));
	end;

	% Check for convergence
	E=((A(M-N+1:M)+R(M-N+1:M))>0);
	e(:,mod(i-1,convits)+1)=E;
	K=sum(E);
	if i>=convits \|\| i>=maxits
	se=sum(e,2);
	unconverged=(sum((se==convits)+(se==0))~=N);
	if (~unconverged&&(K>0))\|\|(i==maxits)
	dn=1;
	end;
	end;

	% Handle plotting and storage of details, if requested
	if plt\|\|details
	if K==0
	tmpnetsim=nan; tmpdpsim=nan; tmpexpref=nan; tmpidx=nan;
	else
	tmpidx=zeros(N,1); tmpdpsim=0;
	tmpidx(find(E))=find(E); tmpexpref=sum(p(find(E)));
	discon=0;
	for j=find(E==0)'
	ss=s(ind1(ind1s(j):ind1e(j)),3);
	ii=s(ind1(ind1s(j):ind1e(j)),2);
	ee=find(E(ii));
	if length(ee)==0 discon=1;
	else
	[smx imx]=max(ss(ee));
	tmpidx(j)=ii(ee(imx));
	tmpdpsim=tmpdpsim+smx;
	end;
	end;
	if discon
	tmpnetsim=nan; tmpdpsim=nan; tmpexpref=nan; tmpidx=nan;
	else tmpnetsim=tmpdpsim+tmpexpref;
	end;
	end;
	end;
	if details
	netsim(i)=tmpnetsim; dpsim(i)=tmpdpsim; expref(i)=tmpexpref;
	idx(:,i)=tmpidx;
	end;
	if plt
	netsim(i)=tmpnetsim;
	figure(234);
	tmp=1:i; tmpi=find(~isnan(netsim(1:i)));
	plot(tmp(tmpi),netsim(tmpi),'r-');
	xlabel('# Iterations');
	ylabel('Net similarity of quantized intermediate solution');
	drawnow;
	end;
	end;
	% Identify exemplars
	E=((A(M-N+1:M)+R(M-N+1:M))>0); K=sum(E);
	if K>0
	tmpidx=zeros(N,1); tmpidx(find(E))=find(E); % Identify clusters
	for j=find(E==0)'
	ss=s(ind1(ind1s(j):ind1e(j)),3);
	ii=s(ind1(ind1s(j):ind1e(j)),2);
	ee=find(E(ii));
	[smx imx]=max(ss(ee));
	tmpidx(j)=ii(ee(imx));
	end;
	EE=zeros(N,1);
	for j=find(E)'
	jj=find(tmpidx==j); mx=-Inf;
	ns=zeros(N,1); msk=zeros(N,1);
	for m=jj'
	mm=s(ind1(ind1s(m):ind1e(m)),2);
	msk(mm)=msk(mm)+1;
	ns(mm)=ns(mm)+s(ind1(ind1s(m):ind1e(m)),3);
	end;
	ii=jj(find(msk(jj)==length(jj)));
	[smx imx]=max(ns(ii));
	EE(ii(imx))=1;
	end;
	E=EE;
	tmpidx=zeros(N,1); tmpdpsim=0;
	tmpidx(find(E))=find(E); tmpexpref=sum(p(find(E)));
	for j=find(E==0)'
	ss=s(ind1(ind1s(j):ind1e(j)),3);
	ii=s(ind1(ind1s(j):ind1e(j)),2);
	ee=find(E(ii));
	[smx imx]=max(ss(ee));
	tmpidx(j)=ii(ee(imx));
	tmpdpsim=tmpdpsim+smx;
	end;
	tmpnetsim=tmpdpsim+tmpexpref;
	else
	tmpidx=nan*ones(N,1); tmpnetsim=nan; tmpexpref=nan;
	end;
	if details
	netsim(i+1)=tmpnetsim; netsim=netsim(1:i+1);
	dpsim(i+1)=tmpnetsim-tmpexpref; dpsim=dpsim(1:i+1);
	expref(i+1)=tmpexpref; expref=expref(1:i+1);
	idx(:,i+1)=tmpidx; idx=idx(:,1:i+1);
	else
	netsim=tmpnetsim; dpsim=tmpnetsim-tmpexpref;
	expref=tmpexpref; idx=tmpidx;
	end;
	if plt\|\|details
	fprintf('\nNumber of identified clusters: %d\n',K);
	fprintf('Fitness (net similarity): %f\n',tmpnetsim);
	fprintf(' Similarities of data points to exemplars: %f\n',dpsim(end));
	fprintf(' Preferences of selected exemplars: %f\n',tmpexpref);
	fprintf('Number of iterations: %d\n\n',i);
	end;
	if unconverged
	fprintf('\n*** Warning: Algorithm did not converge. The similarities\n');
	fprintf(' may contain degeneracies - add noise to the similarities\n');
	fprintf(' to remove degeneracies. To monitor the net similarity,\n');
	fprintf(' activate plotting. Also, consider increasing maxits and\n');
	fprintf(' if necessary dampfact.\n\n');
	end;
	snr = 7;
	xy = [awgn(repmat(1, 5, 1), snr) awgn(repmat(2, 5, 1), snr);
	awgn(repmat(3, 5, 1), snr) awgn(repmat(4, 5, 1), snr)];
	S = zeros(10*9/2, 3);
	C = combnk(1:10, 2);

	%>> xy
	%
	%xy =
	%
	% 0.4872 2.0837
	% 1.0468 1.9632
	% 1.3226 1.1366
	% 2.1549 1.8039
	% 0.7021 1.1983
	% 3.3754 3.7318
	% 2.6033 4.2189
	% 3.0447 4.3303
	% 2.7568 4.7647
	% 3.1356 3.9133

	for i=1:length(C)
	xi = C(i,1);
	yi = C(i,2);
	d = norm(xy(xi, :)-xy(yi, :)); %this is falsy. d should be -norm(...)
	S(i, :) = [C(i,1), C(i,2), d];
	end

	%>> S
	%
	%S =
	%
	% 1 2 0.57245
	% 1 3 1.2629
	% 1 4 1.691
	% 1 5 0.91104
	% 1 6 3.3253
	% 1 7 3.0061
	% 1 8 3.4041
	% 1 9 3.5126
	% 1 10 3.2189
	% 2 3 0.87139
	% 2 4 1.1194
	% 2 5 0.83891
	% 2 6 2.9241
	% 2 7 2.7406
	% 2 8 3.0975
	% 2 9 3.2821
	% 2 10 2.8576
	% 3 4 1.0668
	% 3 5 0.62358
	% 3 6 3.309
	% 3 7 3.3378
	% 3 8 3.6284
	% 3 9 3.9013
	% 3 10 3.3162
	% 4 5 1.5739
	% 4 6 2.2818
	% 4 7 2.4562
	% 4 8 2.6785
	% 4 9 3.0213
	% 4 10 2.3262
	% 5 6 3.6831
	% 5 7 3.5691
	% 5 8 3.9111
	% 5 9 4.1159
	% 5 10 3.6459
	% 6 7 0.91282
	% 6 8 0.6837
	% 6 9 1.2039
	% 6 10 0.30071
	% 7 8 0.45522
	% 7 9 0.56697
	% 7 10 0.61373
	% 8 9 0.52117
	% 8 10 0.42676
	% 9 10 0.93185

	apclusterSparse(S, 0);
	[(1:55)' s ind1 ind2]
	ans =
	1 1 2 1.2908 1 46
	2 1 3 0.98971 2 1
	3 1 4 1.3646 3 47
	4 1 5 0.53551 4 2
	5 1 6 3.0272 5 10
	6 1 7 3.1811 6 48
	7 1 8 2.6518 7 3
	8 1 9 2.6847 8 11
	9 1 10 2.9787 9 18
	10 2 3 1.4746 46 49
	11 2 4 0.19874 10 4
	12 2 5 1.2703 11 12
	13 2 6 3.1465 12 19
	14 2 7 3.4175 13 25
	15 2 8 2.9135 14 50
	16 2 9 2.7438 15 5
	17 2 10 3.1162 16 13
	18 3 4 1.4072 17 20
	19 3 5 0.45646 47 26
	20 3 6 2.0407 18 31
	21 3 7 2.2126 19 51
	22 3 8 1.6809 20 6
	23 3 9 1.6951 21 14
	24 3 10 1.9934 22 21
	25 4 5 1.2638 23 27
	26 4 6 2.9858 24 32
	27 4 7 3.2659 48 36
	28 4 8 2.7697 25 52
	29 4 9 2.5816 26 7
	30 4 10 2.9576 27 15
	31 5 6 2.4971 28 22
	32 5 7 2.6632 29 28
	33 5 8 2.132 30 33
	34 5 9 2.1501 49 37
	35 5 10 2.4495 31 40
	36 6 7 0.36396 32 53
	37 6 8 0.45161 33 8
	38 6 9 0.40532 34 16
	39 6 10 0.059503 35 23
	40 7 8 0.53177 50 29
	41 7 9 0.72777 36 34
	42 7 10 0.35529 37 38
	43 8 9 0.44005 38 41
	44 8 10 0.39225 39 43
	45 9 10 0.38415 51 54
	46 1 1 3.4908e-307 40 9
	47 2 2 7.0924e-307 41 17
	48 3 3 1.0571e-307 42 24
	49 4 4 5.8453e-307 52 30
	50 5 5 1.4614e-306 43 35
	51 6 6 9.2654e-307 44 39
	52 7 7 5.9746e-307 53 42
	53 8 8 1.1337e-306 45 44
	54 9 9 6.0503e-307 54 45
	55 10 10 1.7434e-306 55 55