Skip to content

Instantly share code, notes, and snippets.

@edisonqkj
Created December 12, 2013 07:41
Show Gist options
  • Save edisonqkj/7924482 to your computer and use it in GitHub Desktop.
Save edisonqkj/7924482 to your computer and use it in GitHub Desktop.
Hashing for Cosine Similarity
%% Hashing for Cosine Similarity
% This m file is written for the learning of Locality Sensitive Hashing.
% Enjoy! Any problem is welcome!
%
% Time: 2013/12/12
% Released by: edisonqkj
% E-mail: edison90110@gmail.com
% Referrence:
% http://www.cs.jhu.edu/~vandurme/papers/VanDurmeLallACL10-slides.pdf
% https://gist.github.com/greeness/94a3d425009be0f94751
%
function [h,t]=LSH(dimension,num_plane,isdraw)
clc;
%% Main
% dimension=2;
% num_plane=2^10;
sample1=randn(1,dimension);
sample2=randn(1,dimension);
proj_plane=randn(num_plane,dimension);
[res1]=signature(sample1,proj_plane);
[res2]=signature(sample2,proj_plane);
[hash_sim]=hash_similarity(res1,res2);
[true_sim]=angular_similarity(sample1,sample2);
h=hash_sim;
t=true_sim;
% disp(['Hash Similarity ' 'True Similarity ' 'Diff ']);
% disp([num2str(h) ' ' num2str(t) ' ' num2str(abs(h-t))]);
%% Draw 2D similarity results
% Variant 'dimension' needs 2.
% Classify all the planes by signatures of sample1 and sample2
% and colorize them respectively.
% Red: both samples share the same signature value of 1.
% Green: sample1's signature value equals to 1 but 0 of sample2.
% Blue: not 'Green'
% Black: none belongs to both samples.
if isdraw
x=proj_plane(:,1);
y=proj_plane(:,2);
% sample1: signature=1
% sample2: signature=0
pos1=logical((res1==1).*(res2==0));
plot(x(pos1),y(pos1),'g.');
legend_info{1}=['Sample1'];
hold on;
% sample1: signature=0
% sample2: signature=1
pos2=logical((res2==1).*(res1==0));
plot(x(pos2),y(pos2),'.');
legend_info{2}=['Sample2'];
% sample1: signature=1
% sample2: signature=1
pos3=logical((res2==1).*(res1==1));
plot(x(pos3),y(pos3),'r.');
legend_info{3}=['Shared'];
% sample1: signature=0
% sample2: signature=0
pos4=logical((res2==0).*(res1==0));
plot(x(pos4),y(pos4),'k.');
legend_info{4}=['None'];
% plot sample1,2
plot([0,sample1(1)],[0,sample1(2)],'g-','LineWidth',3);
plot([0,sample2(1)],[0,sample2(2)],'b-','LineWidth',3);
plot(sample1(1),sample1(2),'go','MarkerSize',12);
plot(sample2(1),sample2(2),'bo','MarkerSize',12);
hold off;
grid on;
legend(legend_info);
title(['Hash Similarity: ' num2str(h) ' True Similarity: ' num2str(t)]);
end
end
function [res]=signature(sample,planes)
row=size(planes,1);
for i=1:row
% dot product of sample and planes(i)
if sample*planes(i,:)'>=0
res(i)=1;
else
res(i)=0;
end
end
end
function [res]=hash_similarity(v1,v2)
col=size(v1,2);
res=1.0*sum(v1==v2)/col;
end
function [res]=angular_similarity(v1,v2)
dot=v1*v2';
sum1=sqrt(sum(v1.*v1));
sum2=sqrt(sum(v2.*v2));
theta=acos(dot/(sum1*sum2));
res=1-theta/pi;
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment